In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [25]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [75]:
import torch 
from transformers import RobertaTokenizer,RobertaModel,get_linear_schedule_with_warmup
import transformers 
import torch
import torch.nn as nn 
import pandas as pd 
import numpy as np 
from sklearn import model_selection
from sklearn import metrics
from transformers import AdamW
from sklearn.model_selection import StratifiedKFold
import os 
import random 

In [76]:
transformers.__version__

'4.25.1'

In [77]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [78]:

df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Train.csv')
df_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Test.csv')

In [79]:
df_test.text = df_test.text.str.lower()
df_test.shape

(309, 2)

In [80]:
CONFIG = {
    'MAX_LEN':128,
    'TRAIN_BATCH_SIZE':16,
    'VALID_BATCH_SIZE':16,
    'EPOCHS':3,
    'TOKENIZER':RobertaTokenizer.from_pretrained('roberta-base',lowercase=True,truncation=True)
}

In [81]:
import random
from random import randint
import numpy as np
SEED_VAL  = 5000
# Set the seed value all over the place to make this reproducible.
def seed_all(SEED):
  random.seed(SEED_VAL)
  np.random.seed(SEED_VAL)
  torch.manual_seed(SEED_VAL)
  torch.cuda.manual_seed_all(SEED_VAL)
  os.environ['PYTHONHASHSEED'] = str(SEED_VAL)
  torch.backends.cudnn.deterministic = True

In [82]:
#Roberta Class 
class CustomRoberta(nn.Module):
    def __init__(self):
        super(CustomRoberta, self).__init__()
        self.num_labels = 4
        self.roberta = transformers.RobertaModel.from_pretrained("roberta-base", output_hidden_states=False, num_labels=self.num_labels)
        self.dropout = nn.Dropout(p=0.2)
        self.classifier = nn.Linear(768, self.num_labels)

    def forward(self,
                input_ids=None,
                attention_mask=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None):

        _, o2 = self.roberta(input_ids,
                               attention_mask=attention_mask,
                               position_ids=position_ids,
                               head_mask=head_mask,
                               inputs_embeds=inputs_embeds,
                               return_dict=False)
        o2 = self.dropout(o2)
        logits = self.classifier(o2)       
        outputs = logits
        return outputs

In [83]:
#Dataset 

class RobertaDataset:
  def __init__(self,tweet,target=None,task='train'):
    self.tweet= tweet
    self.target = target
    self.tokenizer = CONFIG['TOKENIZER']
    self.max_len = CONFIG['MAX_LEN']
    self.task = task
  
  def __len__(self):
    return len(self.tweet)

  def __getitem__(self,item):
    tweet = str(self.tweet[item])
    tweet = ' '.join(tweet.split())


    inputs = self.tokenizer.encode_plus(tweet,
                                        max_length=self.max_len,
                                        pad_to_max_length=True,
                                        add_special_tokens=True,
                                        truncation=True)
    ids = inputs['input_ids']
    mask = inputs['attention_mask']
    


    to_return= {
        'ids':torch.tensor(ids,dtype=torch.long),
        'mask':torch.tensor(mask,dtype=torch.long),
    }
    if (self.task=='train'):

      to_return.update({'target':torch.tensor(self.target[item])})

    return to_return 


In [84]:
def loss_fn(outputs,targets):
  criterion =  nn.CrossEntropyLoss()
  return criterion(outputs,targets)

In [85]:
#Train 
def train_fn(data_loader,model,optimizer,device,sc=None):
  model.train()
  tot_loss = 0
  for bi, d in enumerate(data_loader):
    ids = d['ids']
    mask = d['mask']
    targets = d['target']

    #send them to cuda gpu 
    ids = ids.to(device,dtype=torch.long)
    mask = mask.to(device,dtype=torch.long)
   
    targets = targets.to(device,dtype=torch.long)
    
    optimizer.zero_grad()

    outputs = model(
        ids,
        mask,
    )
    
    loss = loss_fn(outputs,targets)
    tot_loss += loss.item()
    loss.backward()
    optimizer.step()
    if sc:
      sc.step()
  
  print("Training loss for this epoch: ",tot_loss/len(data_loader))


In [86]:
#evaluation function 
def eval_fn(data_loader,model,device):
  model.eval()
  fin_targets = []
  fin_outputs =[]
  tot_loss = 0
  with torch.no_grad():
    for bi, d in enumerate(data_loader):
      ids = d['ids']
      mask = d['mask']
      
      targets = d['target']

      #send them to cuda gpu 
      ids = ids.to(device,dtype=torch.long)
      mask = mask.to(device,dtype=torch.long)
      
     
      targets = targets.to(device,dtype=torch.long)
      
      

      outputs = model(
          ids,
          mask
      )

      loss = loss_fn(outputs,targets)
      tot_loss+=loss.item()
      fin_targets.extend(targets.cpu().detach().numpy())
      fin_outputs.extend(torch.nn.functional.softmax(outputs).cpu().detach().numpy())
  return fin_outputs,fin_targets,tot_loss/(len(data_loader))

In [87]:
#preparing test data
test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Test.csv')
test.text = test.text.str.lower()

test_dataset = RobertaDataset(
    tweet=test.text.values,
    task='test'
)

test_data_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=CONFIG['TRAIN_BATCH_SIZE'],
    num_workers=4
)



In [88]:
#function to predict on the test 
def predict_fn(model):
  fin_outputs = []
  with torch.no_grad():
    for bi, d in enumerate(test_data_loader):
      ids = d['ids']
      mask = d['mask']
  
      #send them to cuda gpu 
      ids = ids.to(device,dtype=torch.long)
      mask = mask.to(device,dtype=torch.long)
      
     
      outputs = model(
          ids,
          mask
      )
      fin_outputs.append(torch.nn.functional.softmax(outputs).cpu().detach().numpy())
      alls = np.vstack(fin_outputs)

  return alls

In [91]:
#function to run 5 folds and average their predictions on the test 

def run_folds():
    total_folds=5
    all_preds = []
    losses = []
    seed_all(SEED_VAL)
    dfx = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Train.csv')
    #Convert texts to lowercase and remove duplicate texts
    dfx.text = dfx.text.str.lower()
    dfx = dfx.drop(dfx[dfx[["text", "label"]].duplicated()].index).reset_index(drop = True)

    dfx['label'] = dfx['label'].factorize()[0]
    fold=StratifiedKFold(n_splits=total_folds, shuffle=True)
    for i,(train_index, test_index) in enumerate(fold.split(dfx,dfx['label'])):
      print(f'FOLD {i+1}/{total_folds}')
      df_train = dfx.iloc[train_index]
      df_valid = dfx.iloc[test_index]

      train_dataset =RobertaDataset(
          tweet=df_train.text.values,
          target=df_train.label.values,
          task='train'
      )

      train_data_loader = torch.utils.data.DataLoader(
          train_dataset,
          batch_size=CONFIG['TRAIN_BATCH_SIZE'],
          num_workers=4
      )

      valid_dataset =RobertaDataset(
          tweet=df_valid.text.values,
          target=df_valid.label.values,
          task='train'
      )

      valid_data_loader = torch.utils.data.DataLoader(
          valid_dataset,
          batch_size=CONFIG['TRAIN_BATCH_SIZE'],
          num_workers=1
      )

      device = torch.device("cuda")
      model = CustomRoberta()
      model.to(device)
      
      param_optimizer = list(model.named_parameters())
      no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
      optimizer_parameters = [
          {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
          {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
      ]

      num_train_steps = int(len(df_train) / CONFIG['TRAIN_BATCH_SIZE'] * CONFIG['EPOCHS'])
      optimizer = AdamW(optimizer_parameters, lr=5e-5)
      
      #scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=num_train_steps)


      best_accuracy = 0
      for epoch in range(CONFIG['EPOCHS']):
          print("----------------EPOCH "+str(epoch+1)+"---------------------")
          train_fn(train_data_loader, model, optimizer, device#scheduler
                  )
          outputs,targets,losss = eval_fn(valid_data_loader ,model, device)
          print("LOSS for this Epoc on val: ",losss)
      losses.append(losss)
      fold_preds = predict_fn(model)
      all_preds.append(fold_preds)
    print("mean losses over all folds: ",np.mean(losses))
    return  all_preds

In [92]:
preds = run_folds()

FOLD 1/5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


----------------EPOCH 1---------------------




Training loss for this epoch:  0.9665800213813782


  fin_outputs.extend(torch.nn.functional.softmax(outputs).cpu().detach().numpy())


LOSS for this Epoc on val:  0.6854684427380562
----------------EPOCH 2---------------------




Training loss for this epoch:  0.45283496951063473




LOSS for this Epoc on val:  0.5251188818365335
----------------EPOCH 3---------------------




Training loss for this epoch:  0.22918334032098453




LOSS for this Epoc on val:  0.5717229535803199


  fin_outputs.append(torch.nn.functional.softmax(outputs).cpu().detach().numpy())


FOLD 2/5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


----------------EPOCH 1---------------------




Training loss for this epoch:  1.026597249507904




LOSS for this Epoc on val:  0.5007008947432041
----------------EPOCH 2---------------------




Training loss for this epoch:  0.5215019911527634




LOSS for this Epoc on val:  0.19021576084196568
----------------EPOCH 3---------------------




Training loss for this epoch:  0.24822803493589163




LOSS for this Epoc on val:  0.22486791620031




FOLD 3/5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


----------------EPOCH 1---------------------




Training loss for this epoch:  0.9680180182059606




LOSS for this Epoc on val:  0.5505385361611843
----------------EPOCH 2---------------------




Training loss for this epoch:  0.41305947800477344




LOSS for this Epoc on val:  0.40908720158040524
----------------EPOCH 3---------------------




Training loss for this epoch:  0.3005720383177201




LOSS for this Epoc on val:  0.42389251850545406




FOLD 4/5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


----------------EPOCH 1---------------------




Training loss for this epoch:  0.9021575629711152




LOSS for this Epoc on val:  0.48960448801517487
----------------EPOCH 2---------------------




Training loss for this epoch:  0.34437436213095984




LOSS for this Epoc on val:  0.4361146166920662
----------------EPOCH 3---------------------




Training loss for this epoch:  0.2106118914981683




LOSS for this Epoc on val:  0.5771599970757961




FOLD 5/5


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


----------------EPOCH 1---------------------




Training loss for this epoch:  0.9680970092614491




LOSS for this Epoc on val:  0.5897038578987122
----------------EPOCH 2---------------------




Training loss for this epoch:  0.41861243893702826




LOSS for this Epoc on val:  0.4537631683051586
----------------EPOCH 3---------------------




Training loss for this epoch:  0.21708255025247733




LOSS for this Epoc on val:  0.44778005592525005




mean losses over all folds:  0.44908468825742603


In [94]:
preds_1 = np.mean(preds,axis=0)
preds_1

array([[0.4978481 , 0.01570824, 0.43842125, 0.0480224 ],
       [0.96831405, 0.00311518, 0.02459081, 0.00397992],
       [0.9799635 , 0.00210466, 0.01585909, 0.00207279],
       ...,
       [0.03233908, 0.04383009, 0.3717416 , 0.55208933],
       [0.00209358, 0.9647826 , 0.00308741, 0.03003637],
       [0.02094728, 0.02873447, 0.20975132, 0.7405669 ]], dtype=float32)

In [95]:
sub=pd.DataFrame()
sub['ID'] = test['ID']
sub['Depression'] = preds_1[:,0]
sub['Alcohol'] = preds_1[:,3]
sub['Suicide'] = preds_1[:,2]
sub['Drugs'] = preds_1[:,1]
sub.head()

Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
0,02V56KMO,0.497848,0.048022,0.438421,0.015708
1,03BMGTOK,0.968314,0.00398,0.024591,0.003115
2,03LZVFM6,0.979963,0.002073,0.015859,0.002105
3,0EPULUM5,0.975782,0.002431,0.019574,0.002213
4,0GM4C5GD,0.002532,0.463164,0.004476,0.529829


In [96]:
sub.to_csv("Roberta_submission.csv",index=False)