In [None]:
!pip install torch torchvision --quiet
!pip install transformers  --quiet
!pip install pandas  --quiet
!pip install numpy  --quiet
!pip install sentencepiece  --quiet
!pip install sentence-splitter  --quiet
!pip install shap --quiet
!pip install optuna --quiet




[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m47.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.9/547.9 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.2/404.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Imports**

In [None]:
import random
import torch
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW,AutoModelForQuestionAnswering, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
from transformers import DebertaTokenizer, DebertaModel, BartTokenizer
import math

# **Model loading**

In [None]:
# Use a GPU if you have one available (Runtime -> Change runtime type -> GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds for reproducibility
random.seed(26)
np.random.seed(26)
torch.manual_seed(26)

tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-large", do_lower_case=True)


Downloading (…)okenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/475 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [None]:
def get_optimizer_grouped_parameters(
    model, model_type,
    learning_rate, weight_decay,
    layerwise_learning_rate_decay
):
    no_decay = ["bias", "LayerNorm.weight"]
    # initialize lr for task specific layer
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if "classifier" in n or "pooler" in n],
            "weight_decay": 0.0,
            "lr": learning_rate,
        },
    ]
    # initialize lrs for every layer
    num_layers = model.config.num_hidden_layers
    layers = [getattr(model, model_type).embeddings] + list(getattr(model, model_type).encoder.layer)
    layers.reverse()
    lr = learning_rate
    for layer in layers:
        lr *= layerwise_learning_rate_decay
        optimizer_grouped_parameters += [
            {
                "params": [p for n, p in layer.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": weight_decay,
                "lr": lr,
            },
            {
                "params": [p for n, p in layer.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
                "lr": lr,
            },
        ]
    return optimizer_grouped_parameters

In [None]:
def encode_data(tokenizer, passages,questions, max_length):
    """Encode the question/passage pairs into features than can be fed to the model."""
    input_ids = []
    attention_masks = []

    for passage,question in zip(passages,questions):
        encoded_data = tokenizer.encode_plus(passage,question, max_length=max_length, pad_to_max_length=True, truncation='longest_first')
        encoded_pair = encoded_data["input_ids"]
        attention_mask = encoded_data["attention_mask"]

        input_ids.append(encoded_pair)
        attention_masks.append(attention_mask)

    return np.array(input_ids), np.array(attention_masks)

In [None]:
def predict(passage,question):
  sequence = tokenizer.encode_plus(passage,question,max_length=512, pad_to_max_length=True, truncation='longest_first', return_tensors="pt")['input_ids'].to(device)

  logits = model(sequence)[0]
  probabilities = torch.softmax(logits, dim=1).detach().cpu().tolist()[0]
  proba_yes = round(probabilities[1], 2)
  proba_no = round(probabilities[0], 2)

  #print(f"Question: {question}, Yes: {proba_yes}, No: {proba_no}")

  if (proba_yes >= proba_no):
    return True
  else:
    return False






In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split



# Train and evaluate the accuracy of neural network with the addition of pruning mechanism
def train_and_evaluate(model,train_data_df,dev_data_df):
    passages_train = train_data_df.Abstract.values
    questions_train = train_data_df.questions.values
    answers_train = train_data_df.AMES.values.astype(int)

    passages_dev = dev_data_df.Abstract.values
    questions_dev = dev_data_df.questions.values
    answers_dev = dev_data_df.AMES.values.astype(int)

    # Encoding data
    max_seq_length = 512
    input_ids_train, attention_masks_train = encode_data(tokenizer, passages_train,questions_train, max_seq_length)
    input_ids_dev, attention_masks_dev = encode_data(tokenizer, passages_dev,questions_dev, max_seq_length)

    train_features = (input_ids_train, attention_masks_train, answers_train)
    dev_features = (input_ids_dev, attention_masks_dev, answers_dev)

    batch_size = 2
    train_features_tensors = [torch.tensor(feature, dtype=torch.long) for feature in train_features]
    dev_features_tensors = [torch.tensor(feature, dtype=torch.long) for feature in dev_features]

    train_dataset = TensorDataset(*train_features_tensors)
    dev_dataset = TensorDataset(*dev_features_tensors)

    train_sampler = RandomSampler(train_dataset)
    dev_sampler = SequentialSampler(dev_dataset)

    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
    dev_dataloader = DataLoader(dev_dataset, sampler=dev_sampler, batch_size=batch_size)

    learning_rate = 5e-5
    layerwise_learning_rate_decay = 0.9
    adam_epsilon = 1e-6
    use_bertadam = False

    # scheduler params
    num_epochs = 5
    num_warmup_steps = 0

    optimizer = AdamW(model.parameters(),
        lr=learning_rate,
        eps=adam_epsilon,
        correct_bias=not use_bertadam
    )
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_epochs
    )

    print("Done setting up optimizer\n")
    train_loss_values = []
    dev_acc_values = []
    state=[]

    for i in tqdm(range(num_epochs), desc="Epoch"):

      # Training
      print("In epoch ", i, "\n")
      epoch_train_loss = 0 # Cumulative loss
      model.train()
      #model.zero_grad()

      for step, batch in enumerate(train_dataloader):

          input_ids = batch[0].to(device)
          attention_masks = batch[1].to(device)
          labels = batch[2].to(device)
          model.zero_grad()
          outputs = model(input_ids, token_type_ids=None, attention_mask=attention_masks, labels=labels)

          loss = outputs[0]
          #loss = loss / grad_acc_steps
          epoch_train_loss += loss.item()

          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
          optimizer.step()
          scheduler.step()

      epoch_train_loss = epoch_train_loss / len(train_dataloader)
      train_loss_values.append(epoch_train_loss)
      print("Epoch loss is", epoch_train_loss)

      # Evaluation
      epoch_dev_accuracy = 0 # Cumulative accuracy
      model.eval()

      for batch in dev_dataloader:

        input_ids = batch[0].to(device)
        attention_masks = batch[1].to(device)
        labels = batch[2]

        with torch.no_grad():
            outputs = model(input_ids, token_type_ids=None, attention_mask=attention_masks)

        logits = outputs[0]
        logits = logits.detach().cpu().numpy()

        predictions = np.argmax(logits, axis=1).flatten()
        labels = labels.numpy().flatten()

        epoch_dev_accuracy += np.sum(predictions == labels) / len(labels)

      epoch_dev_accuracy = epoch_dev_accuracy / len(dev_dataloader)
      print("Epoch accuracy is",epoch_dev_accuracy )
      dev_acc_values.append(epoch_dev_accuracy)
      temp=model
      state.append(temp)

    index = dev_acc_values.index(max(dev_acc_values))
    best = state[index]
    return best



In [None]:
import pandas as pd

df=pd.read_csv('./new_data_mutagenicity.csv')

# Apply the function to the column
df['label'] = df['label'].apply(lambda x: x.startswith("['True"))
#df['label'] = df['label'].apply(lambda x: x[0] == "['False - AMES non Mutagenic']")

new_column_names = {'text': 'Abstract', 'label': 'AMES'}
df.rename(columns=new_column_names, inplace=True)


In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import pandas as pd
import gc
#df=pd.read_csv('./data.csv')

kf = KFold(n_splits=5, random_state=42, shuffle=True)
CV_accuracy_array=[]
CV_macro_avg_array=[]
CV_weighted_avg_array=[]
for train_index, test_index in kf.split(df):
    print("TRAIN:", train_index)
    print("TEST:", test_index)
    train_data_df, dev_data_df = df.loc[train_index], df.loc[test_index]
    model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-large")
    model.to(device)
    model=train_and_evaluate(model,train_data_df,dev_data_df)
    filter=list(dev_data_df.index)
    preds=[]
    index=[]
    for i in filter:
        try:
          passage=dev_data_df.Abstract[i]
          question =dev_data_df.questions[i]
          answer=predict(passage,question)
          preds.append(answer)
          index.append(i)
        except Exception as e:
          print(e)
          continue
    dev=dev_data_df[dev_data_df.index.isin(index)]
    true_results=dev['AMES'].tolist()
    print(classification_report(true_results, preds))
    results = classification_report(true_results, preds,output_dict=True,)
    CV_accuracy_array.append(results['accuracy'])
    CV_macro_avg_array.append(results['macro avg']['f1-score'])
    CV_weighted_avg_array.append(results['weighted avg']['f1-score'])
    del model
    gc.collect()
    torch.cuda.empty_cache()



TRAIN: [   0    1    2 ... 1643 1644 1645]
TEST: [  15   23   29   30   32   43   44   49   51   56   59   63   65   67
   69   70   73   76   78   99  100  101  107  109  115  123  124  128
  135  141  148  162  163  168  170  173  175  184  185  192  198  199
  203  212  220  226  231  237  239  240  244  247  251  259  261  266
  270  271  274  275  289  297  298  300  303  306  309  316  324  331
  332  339  342  344  350  351  352  353  366  367  371  374  383  394
  398  405  408  411  413  414  415  416  420  422  425  426  433  438
  450  451  464  471  479  481  482  483  486  490  493  494  497  526
  527  529  534  538  543  551  552  554  560  561  567  575  582  584
  585  588  589  590  591  597  610  613  614  617  619  620  621  629
  651  654  668  669  674  679  680  682  694  706  707  720  724  727
  730  741  744  752  754  764  765  767  772  780  792  798  802  803
  809  810  813  816  818  838  842  844  845  847  855  861  865  867
  873  874  879  887  888  8

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['pooler.dense.weight', 'classifier.bias', 'pooler.dense.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7271762498425133
Epoch accuracy is 0.5454545454545454
In epoch  1 

Epoch loss is 0.7128254203040911
Epoch accuracy is 0.5454545454545454
In epoch  2 

Epoch loss is 0.6999627517077698
Epoch accuracy is 0.5454545454545454
In epoch  3 

Epoch loss is 0.6971901832289971
Epoch accuracy is 0.5454545454545454
In epoch  4 

Epoch loss is 0.6933660434734495
Epoch accuracy is 0.5454545454545454


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       150
        True       0.55      1.00      0.71       180

    accuracy                           0.55       330
   macro avg       0.27      0.50      0.35       330
weighted avg       0.30      0.55      0.39       330

TRAIN: [   0    1    2 ... 1643 1644 1645]
TEST: [  10   18   31   41   48   54   58   81   83   86   88   96  111  113
  126  129  131  140  142  147  155  156  158  164  174  178  179  181
  188  195  196  208  209  210  214  218  221  233  236  243  254  265
  277  282  285  286  287  290  291  292  294  296  308  310  312  322
  323  326  327  333  341  346  348  354  355  358  360  361  363  365
  370  375  377  380  381  382  390  410  419  423  427  428  429  430
  432  435  439  447  453  458  461  462  468  477  478  485  495  500
  506  513  514  516  518  522  528  530  532  535  544  548  555  566
  570  571  576  578  583  596  598  599  601  602  607 

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['pooler.dense.weight', 'classifier.bias', 'pooler.dense.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7236438375955648
Epoch accuracy is 0.5848484848484848
In epoch  1 

Epoch loss is 0.7227977163954009
Epoch accuracy is 0.5848484848484848
In epoch  2 

Epoch loss is 0.7144184073656571
Epoch accuracy is 0.5848484848484848
In epoch  3 

Epoch loss is 0.7196700037068409
Epoch accuracy is 0.5848484848484848
In epoch  4 

Epoch loss is 0.7258954085692289
Epoch accuracy is 0.41515151515151516


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       137
        True       0.58      1.00      0.74       192

    accuracy                           0.58       329
   macro avg       0.29      0.50      0.37       329
weighted avg       0.34      0.58      0.43       329

TRAIN: [   1    4    7 ... 1642 1643 1645]
TEST: [   0    2    3    5    6    9   12   24   25   27   33   39   42   45
   47   52   55   60   62   66   68   71   72   74   77   80   82   84
   85   92   94   97  102  104  105  106  110  117  118  120  125  132
  136  137  138  139  145  165  167  171  182  183  193  194  204  211
  213  215  222  223  224  227  228  232  235  238  242  248  249  250
  256  258  260  267  272  273  280  281  299  302  305  307  311  314
  318  319  321  325  328  329  334  336  349  359  362  364  368  373
  376  388  393  404  409  421  424  434  436  440  442  445  446  448
  449  457  465  467  480  503  505  507  519  523  525 

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['pooler.dense.weight', 'classifier.bias', 'pooler.dense.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7250624282090544
Epoch accuracy is 0.5666666666666667
In epoch  1 

Epoch loss is 0.7099103429870558
Epoch accuracy is 0.5666666666666667
In epoch  2 

Epoch loss is 0.7186921591642955
Epoch accuracy is 0.43333333333333335
In epoch  3 

Epoch loss is 0.7054008728003466
Epoch accuracy is 0.5666666666666667
In epoch  4 

Epoch loss is 0.7041155550069259
Epoch accuracy is 0.5666666666666667


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       142
        True       0.57      1.00      0.72       187

    accuracy                           0.57       329
   macro avg       0.28      0.50      0.36       329
weighted avg       0.32      0.57      0.41       329

TRAIN: [   0    1    2 ... 1641 1642 1644]
TEST: [   4    7   11   16   17   19   22   28   35   36   38   46   50   57
   61   75   79   89   90   93  108  114  116  119  127  133  144  149
  153  154  157  159  169  172  176  177  180  190  191  217  234  245
  255  257  263  264  268  278  283  284  301  304  313  320  335  338
  340  347  356  357  369  372  386  389  395  396  399  407  412  417
  431  443  444  454  456  460  470  473  475  476  487  489  491  496
  498  499  501  504  511  512  515  517  521  537  539  546  557  559
  568  569  574  580  587  593  595  604  606  616  625  633  635  652
  653  655  656  657  658  662  667  671  675  684  685 

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['pooler.dense.weight', 'classifier.bias', 'pooler.dense.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7292584691631957
Epoch accuracy is 0.5606060606060606
In epoch  1 

Epoch loss is 0.6967355104697492
Epoch accuracy is 0.4393939393939394
In epoch  2 

Epoch loss is 0.6964789382966408
Epoch accuracy is 0.5606060606060606
In epoch  3 

Epoch loss is 0.6958787131472314
Epoch accuracy is 0.5606060606060606
In epoch  4 

Epoch loss is 0.695475692949816
Epoch accuracy is 0.5606060606060606


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       145
        True       0.56      1.00      0.72       184

    accuracy                           0.56       329
   macro avg       0.28      0.50      0.36       329
weighted avg       0.31      0.56      0.40       329

TRAIN: [   0    2    3 ... 1643 1644 1645]
TEST: [   1    8   13   14   20   21   26   34   37   40   53   64   87   91
   95   98  103  112  121  122  130  134  143  146  150  151  152  160
  161  166  186  187  189  197  200  201  202  205  206  207  216  219
  225  229  230  241  246  252  253  262  269  276  279  288  293  295
  315  317  330  337  343  345  378  379  384  385  387  391  392  397
  400  401  402  403  406  418  437  441  452  455  459  463  466  469
  472  474  484  488  492  502  508  509  510  520  524  540  550  556
  562  563  564  565  577  586  592  600  608  612  623  627  632  639
  640  641  642  645  646  647  648  659  663  681  683 

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['pooler.dense.weight', 'classifier.bias', 'pooler.dense.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.708608222930312
Epoch accuracy is 0.5272727272727272
In epoch  1 

Epoch loss is 0.6993126750263712
Epoch accuracy is 0.5272727272727272
In epoch  2 

Epoch loss is 0.712444632657562
Epoch accuracy is 0.5272727272727272
In epoch  3 

Epoch loss is 0.6951574403707825
Epoch accuracy is 0.5272727272727272
In epoch  4 

Epoch loss is 0.7095212600503417
Epoch accuracy is 0.5272727272727272


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       156
        True       0.53      1.00      0.69       173

    accuracy                           0.53       329
   macro avg       0.26      0.50      0.34       329
weighted avg       0.28      0.53      0.36       329



In [None]:
print("The mean accuracy score is", np.mean(CV_accuracy_array))

The mean accuracy score is 0.5565073224647692


In [None]:
print("The standard deviation for accuracy is", np.std(CV_accuracy_array))

The standard deviation for accuracy is 0.019723091383869008


In [None]:
print("The mean macro avg score is", np.mean(CV_macro_avg_array))

The mean macro avg score is 0.35743246561285746


In [None]:
print("The standard deviation for macro avg score is", np.std(CV_macro_avg_array))

The standard deviation for macro avg score is 0.008167267905088729


In [None]:
print("The mean weighted avg score is", np.mean(CV_weighted_avg_array))

The mean weighted avg score is 0.3981497137038236


In [None]:
print("The standard deviation for weighted avg score is", np.std(CV_weighted_avg_array))

The standard deviation for weighted avg score is 0.023113605978325475


In [None]:
from google.colab import runtime
runtime.unassign()
