<a href="https://colab.research.google.com/github/gjorgjinac/food_chem/blob/master/food_chem_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install -U simpletransformers==0.60.8 transformers==4.3.2 import-ipynb shap

Requirement already up-to-date: simpletransformers==0.60.8 in /usr/local/lib/python3.7/dist-packages (0.60.8)
Requirement already up-to-date: transformers==4.3.2 in /usr/local/lib/python3.7/dist-packages (4.3.2)
Requirement already up-to-date: import-ipynb in /usr/local/lib/python3.7/dist-packages (0.1.3)
Requirement already up-to-date: shap in /usr/local/lib/python3.7/dist-packages (0.39.0)


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd

In [3]:
project_dir = os.path.join('drive', 'My Drive', 'projects', 'food_drug')

def save_report_to_file(report, file_name):
  ret = pd.DataFrame.from_dict(report)
  ltx = ret.to_latex(label="tab:results", caption="Results")
  print(ret)
  with open(os.path.join(project_dir,'results', file_name), "w") as f_out:
    f_out.write(f"{ltx}\n")


def read_df_from_project_dir(file_name):
  return pd.read_csv(os.path.join(project_dir,file_name), index_col=[0] )

def get_path_from_project_dir(file_name):
  return os.path.join(project_dir,file_name)

In [4]:
import re
class FoodChemProcessor():
    def replace_ignore_case(self, string, substring, replacement):
        for bracket_char in ['(',')','[',']']:
            substring=substring.replace(bracket_char,f'\{bracket_char}')
        return re.sub(rf'{substring}', replacement, string, flags=re.IGNORECASE)

    def extract_relation(self, row):
        term1_text = row['term1'] if 'term1' in row.keys() else row['text_x']
        term2_text = row['term2'] if 'term2' in row.keys() else row['text_y']
        sentence = row['sentence']
        if len(term1_text) > len(term2_text):
            sentence = self.replace_ignore_case(sentence, term1_text, 'XXX')
            sentence = self.replace_ignore_case(sentence, term2_text, 'YYY')
        else:
            sentence = self.replace_ignore_case(sentence, term1_text, 'YYY')
            sentence = self.replace_ignore_case(sentence, term2_text, 'XXX')
        if sentence.find('XXX') == -1 or sentence.find('YYY') == -1:
            sentence = sentence.replace('XXX', 'XXXYYY').replace('YYY', 'XXXYYY')
        row['text']=sentence
        return row

In [13]:
processor = FoodChemProcessor()
p1,p2,p3 = [read_df_from_project_dir(f'food_chemical_10_folds/0/{split_name}.csv') for split_name in ['train','val','test']]
whole=p1.append(p2).append(p3)
sentences_in_whole=whole['sentence'].values
augmentation_setting='aug_bal'
aug_positive = read_df_from_project_dir('food_chem_augmentation_data/positive.csv')
aug_positive['labels']=1
aug_negative = read_df_from_project_dir('food_chem_augmentation_data/negative.csv')
aug_negative['labels']=0
print(aug_positive.shape)
print(aug_negative.shape)
aug_positive=aug_positive[~aug_positive['sentence'].isin(sentences_in_whole)]
aug_negative=aug_negative[~aug_negative['sentence'].isin(sentences_in_whole)]
aug_positive, aug_negative = [a.apply(lambda x: processor.extract_relation(x), axis=1) for a in [aug_positive, aug_negative] ]

aug = aug_positive.append(aug_negative)
print(aug.columns)

(493, 23)
Index(['sentence_index', 'sentence', 'extractor_x', 'supported_by',
       'file_name', 'support', 'end_char_x', 'text_x', 'data_from_extractor',
       'start_char_x', 'entity_type_x', 'entity_id_x', 'dataset',
       'start_char_y', 'end_char_y', 'entity_type_y', 'entity_id_y', 'text_y',
       'sentence_y', 'extractor_y', 'prediction', 'relation_label', 'labels'],
      dtype='object')
(1396, 28)
(3949, 28)
Index(['start_char_x', 'support', 'file_name', 'sentence_index', 'end_char_x',
       'data_from_extractor', 'term1', 'sentence', 'supported_by',
       'extractor_x', 'entity_type_x', 'entity_id_x', 'dataset_x',
       'start_char_y', 'end_char_y', 'entity_type_y', 'entity_id_y', 'term2',
       'sentence_y', 'extractor_y', 'dataset_y', 'prediction',
       'relation_candidates', 'bert', 'roberta', 'biobert', 'classifier_sum',
       'labels'],
      dtype='object')
(5345, 10)
Index(['start_char_x', 'support', 'file_name', 'sentence_index', 'end_char_x',
       'data_f

In [None]:
from simpletransformers.classification import (ClassificationModel, ClassificationArgs)
from sklearn.metrics import classification_report
import numpy as np
from sklearn.model_selection import train_test_split

bert_model_name='roberta'
bert_model_config={'biobert':'dmis-lab/biobert-v1.1', 'roberta': 'roberta-base', 'bert':'bert-base-cased'}

for fold_number in range(0,10):
  print(f'FOLD: {fold_number}')
  train, val, test = [read_df_from_project_dir(f'food_chemical_10_folds/{fold_number}/{split_name}.csv') for split_name in ['train', 'val', 'test']]
  train, val, test = [df.apply(lambda x: processor.extract_relation(x), axis=1) for df in [train,val,test]]
  if augmentation_setting=='aug':
    train = train.append(aug)
  if augmentation_setting=='aug_bal':
    train_positive_count=train[train['labels']==1].shape[0]
    train_negative_count=train[train['labels']==0].shape[0]
    wanted_samples_per_class=min(train_positive_count+aug_positive.shape[0], train_negative_count+aug_negative.shape[0])
    print('W:')
    print(wanted_samples_per_class)
    print(train_positive_count)
    print(train_negative_count)
    train=train.append(aug_positive.head(wanted_samples_per_class-train_positive_count)).append(aug_negative.head(wanted_samples_per_class-train_negative_count))

  model_args = ClassificationArgs(num_train_epochs=10, do_lower_case = False,
                                    #overwrite_output_dir=True, output_dir=get_path_from_project_dir(f'trained_models/{bert_model_name}_{model_name}_{source}_{relation_extractor.name}'), 
                                    #best_model_dir =  get_path_from_project_dir(f'trained_models/{bert_model_name}_{model_name}_{source}_{relation_extractor.name}/best'), 
                                    save_model_every_epoch =False, save_eval_checkpoints = False,save_steps = -1,
                                    evaluate_during_training_verbose=True,evaluate_during_training=True, 
                                    early_stopping_consider_epochs = True, use_early_stopping=True, early_stopping_patience = 2, early_stopping_delta=5e-3)
  

  print(train.groupby('labels').count()['sentence'])
  print(test.groupby('labels').count()['sentence'])
  test_val_text = list(test['sentence'].values) + list(val['sentence'].values)
  print(train.shape)
  train=train[~train['sentence'].isin(test_val_text)]
  print(train.shape)

  train, val, test = [d[['text','labels']].dropna().drop_duplicates() for d in [train,val,test]]
  
  model = ClassificationModel(
      bert_model_name if bert_model_name!='biobert' else 'bert',
      bert_model_config[bert_model_name],
      num_labels=len(set(train['labels'].values)), args=model_args)
  ! rm -r outputs
  model.train_model(train, eval_df=val)

  # Make predictions with the model
  predictions, raw_outputs = model.predict(list(test['text'].values))
  report = classification_report(test['labels'], predictions, output_dict=True)
  report_file_name = f"{bert_model_name}_{fold_number}_{augmentation_setting}.txt"
  print(report)
  save_report_to_file (report=report, file_name=report_file_name)

FOLD: 0
W:
1690
294
149
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    11
1    24
Name: sentence, dtype: int64
(3380, 33)
(3380, 33)


Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.6923076923076923, 'recall': 0.8181818181818182, 'f1-score': 0.7500000000000001, 'support': 11}, '1': {'precision': 0.9090909090909091, 'recall': 0.8333333333333334, 'f1-score': 0.8695652173913043, 'support': 24}, 'accuracy': 0.8285714285714286, 'macro avg': {'precision': 0.8006993006993006, 'recall': 0.8257575757575758, 'f1-score': 0.8097826086956522, 'support': 35}, 'weighted avg': {'precision': 0.840959040959041, 'recall': 0.8285714285714286, 'f1-score': 0.8319875776397515, 'support': 35}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.692308   0.909091  0.828571   0.800699      0.840959
recall      0.818182   0.833333  0.828571   0.825758      0.828571
f1-score    0.750000   0.869565  0.828571   0.809783      0.831988
support    11.000000  24.000000  0.828571  35.000000     35.000000
FOLD: 1
W:
1690
294
149
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    11
1    24
Name: sentence, dtype: int64
(3380, 33)
(33

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.8461538461538461, 'recall': 1.0, 'f1-score': 0.9166666666666666, 'support': 11}, '1': {'precision': 1.0, 'recall': 0.9166666666666666, 'f1-score': 0.9565217391304348, 'support': 24}, 'accuracy': 0.9428571428571428, 'macro avg': {'precision': 0.9230769230769231, 'recall': 0.9583333333333333, 'f1-score': 0.9365942028985508, 'support': 35}, 'weighted avg': {'precision': 0.9516483516483516, 'recall': 0.9428571428571428, 'f1-score': 0.9439958592132505, 'support': 35}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.846154   1.000000  0.942857   0.923077      0.951648
recall      1.000000   0.916667  0.942857   0.958333      0.942857
f1-score    0.916667   0.956522  0.942857   0.936594      0.943996
support    11.000000  24.000000  0.942857  35.000000     35.000000
FOLD: 2
W:
1690
294
149
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    12
1    23
Name: sentence, dtype: int64
(3380, 33)
(3380, 33)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.6666666666666666, 'recall': 0.8333333333333334, 'f1-score': 0.7407407407407408, 'support': 12}, '1': {'precision': 0.9, 'recall': 0.782608695652174, 'f1-score': 0.8372093023255814, 'support': 23}, 'accuracy': 0.8, 'macro avg': {'precision': 0.7833333333333333, 'recall': 0.8079710144927537, 'f1-score': 0.7889750215331611, 'support': 35}, 'weighted avg': {'precision': 0.82, 'recall': 0.8, 'f1-score': 0.8041343669250647, 'support': 35}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.666667   0.900000       0.8   0.783333      0.820000
recall      0.833333   0.782609       0.8   0.807971      0.800000
f1-score    0.740741   0.837209       0.8   0.788975      0.804134
support    12.000000  23.000000       0.8  35.000000     35.000000
FOLD: 3
W:
1690
294
150
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    12
1    22
Name: sentence, dtype: int64
(3380, 33)
(3380, 33)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 1.0, 'recall': 0.8333333333333334, 'f1-score': 0.9090909090909091, 'support': 12}, '1': {'precision': 0.9166666666666666, 'recall': 1.0, 'f1-score': 0.9565217391304348, 'support': 22}, 'accuracy': 0.9411764705882353, 'macro avg': {'precision': 0.9583333333333333, 'recall': 0.9166666666666667, 'f1-score': 0.9328063241106719, 'support': 34}, 'weighted avg': {'precision': 0.946078431372549, 'recall': 0.9411764705882353, 'f1-score': 0.9397814461753081, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   1.000000   0.916667  0.941176   0.958333      0.946078
recall      0.833333   1.000000  0.941176   0.916667      0.941176
f1-score    0.909091   0.956522  0.941176   0.932806      0.939781
support    12.000000  22.000000  0.941176  34.000000     34.000000
FOLD: 4
W:
1690
294
150
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    13
1    21
Name: sentence, dtype: int64
(3380, 33)
(3380, 33)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 1.0, 'recall': 0.6153846153846154, 'f1-score': 0.761904761904762, 'support': 13}, '1': {'precision': 0.8076923076923077, 'recall': 1.0, 'f1-score': 0.8936170212765957, 'support': 21}, 'accuracy': 0.8529411764705882, 'macro avg': {'precision': 0.9038461538461539, 'recall': 0.8076923076923077, 'f1-score': 0.8277608915906789, 'support': 34}, 'weighted avg': {'precision': 0.8812217194570137, 'recall': 0.8529411764705882, 'f1-score': 0.843256451516777, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   1.000000   0.807692  0.852941   0.903846      0.881222
recall      0.615385   1.000000  0.852941   0.807692      0.852941
f1-score    0.761905   0.893617  0.852941   0.827761      0.843256
support    13.000000  21.000000  0.852941  34.000000     34.000000
FOLD: 5
W:
1690
294
150
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    11
1    23
Name: sentence, dtype: int64
(3380, 33)
(3380, 33)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]



Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.8333333333333334, 'recall': 0.9090909090909091, 'f1-score': 0.8695652173913043, 'support': 11}, '1': {'precision': 0.9545454545454546, 'recall': 0.9130434782608695, 'f1-score': 0.9333333333333332, 'support': 23}, 'accuracy': 0.9117647058823529, 'macro avg': {'precision': 0.893939393939394, 'recall': 0.9110671936758893, 'f1-score': 0.9014492753623188, 'support': 34}, 'weighted avg': {'precision': 0.9153297682709448, 'recall': 0.9117647058823529, 'f1-score': 0.9127024722932651, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.833333   0.954545  0.911765   0.893939      0.915330
recall      0.909091   0.913043  0.911765   0.911067      0.911765
f1-score    0.869565   0.933333  0.911765   0.901449      0.912702
support    11.000000  23.000000  0.911765  34.000000     34.000000
FOLD: 6
W:
1690
294
150
labels
0    1690
1    1690
Name: sentence, dtype: int64
labels
0    11
1    23
Name: sentence, dtype: int64
(3380, 33)
(33

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.8571428571428571, 'recall': 0.5454545454545454, 'f1-score': 0.6666666666666665, 'support': 11}, '1': {'precision': 0.8148148148148148, 'recall': 0.9565217391304348, 'f1-score': 0.8800000000000001, 'support': 23}, 'accuracy': 0.8235294117647058, 'macro avg': {'precision': 0.8359788359788359, 'recall': 0.7509881422924901, 'f1-score': 0.7733333333333333, 'support': 34}, 'weighted avg': {'precision': 0.828509181450358, 'recall': 0.8235294117647058, 'f1-score': 0.8109803921568628, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.857143   0.814815  0.823529   0.835979      0.828509
recall      0.545455   0.956522  0.823529   0.750988      0.823529
f1-score    0.666667   0.880000  0.823529   0.773333      0.810980
support    11.000000  23.000000  0.823529  34.000000     34.000000
FOLD: 7
W:
1691
295
149
labels
0    1691
1    1691
Name: sentence, dtype: int64
labels
0    11
1    23
Name: sentence, dtype: int64
(3382, 33)
(33

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.75, 'recall': 0.8181818181818182, 'f1-score': 0.7826086956521738, 'support': 11}, '1': {'precision': 0.9090909090909091, 'recall': 0.8695652173913043, 'f1-score': 0.888888888888889, 'support': 23}, 'accuracy': 0.8529411764705882, 'macro avg': {'precision': 0.8295454545454546, 'recall': 0.8438735177865613, 'f1-score': 0.8357487922705313, 'support': 34}, 'weighted avg': {'precision': 0.857620320855615, 'recall': 0.8529411764705882, 'f1-score': 0.8545041204887752, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.750000   0.909091  0.852941   0.829545      0.857620
recall      0.818182   0.869565  0.852941   0.843874      0.852941
f1-score    0.782609   0.888889  0.852941   0.835749      0.854504
support    11.000000  23.000000  0.852941  34.000000     34.000000
FOLD: 8
W:
1691
295
149
labels
0    1691
1    1691
Name: sentence, dtype: int64
labels
0    12
1    22
Name: sentence, dtype: int64
(3382, 33)
(3382, 33)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.8181818181818182, 'recall': 0.75, 'f1-score': 0.7826086956521738, 'support': 12}, '1': {'precision': 0.8695652173913043, 'recall': 0.9090909090909091, 'f1-score': 0.888888888888889, 'support': 22}, 'accuracy': 0.8529411764705882, 'macro avg': {'precision': 0.8438735177865613, 'recall': 0.8295454545454546, 'f1-score': 0.8357487922705313, 'support': 34}, 'weighted avg': {'precision': 0.8514299000232505, 'recall': 0.8529411764705882, 'f1-score': 0.8513782324524013, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.818182   0.869565  0.852941   0.843874      0.851430
recall      0.750000   0.909091  0.852941   0.829545      0.852941
f1-score    0.782609   0.888889  0.852941   0.835749      0.851378
support    12.000000  22.000000  0.852941  34.000000     34.000000
FOLD: 9
W:
1691
295
149
labels
0    1691
1    1691
Name: sentence, dtype: int64
labels
0    10
1    24
Name: sentence, dtype: int64
(3382, 33)
(3382, 33)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

{'0': {'precision': 0.8181818181818182, 'recall': 0.9, 'f1-score': 0.8571428571428572, 'support': 10}, '1': {'precision': 0.9565217391304348, 'recall': 0.9166666666666666, 'f1-score': 0.9361702127659574, 'support': 24}, 'accuracy': 0.9117647058823529, 'macro avg': {'precision': 0.8873517786561265, 'recall': 0.9083333333333333, 'f1-score': 0.8966565349544073, 'support': 34}, 'weighted avg': {'precision': 0.915833527086724, 'recall': 0.9117647058823529, 'f1-score': 0.9129268728768103, 'support': 34}}
                   0          1  accuracy  macro avg  weighted avg
precision   0.818182   0.956522  0.911765   0.887352      0.915834
recall      0.900000   0.916667  0.911765   0.908333      0.911765
f1-score    0.857143   0.936170  0.911765   0.896657      0.912927
support    10.000000  24.000000  0.911765  34.000000     34.000000


In [None]:
from simpletransformers.classification import (ClassificationModel, ClassificationArgs)
from sklearn.metrics import classification_report
import numpy as np
from sklearn.model_selection import train_test_split


bert_model_name='roberta'
bert_model_config={'biobert':'dmis-lab/biobert-v1.1', 'roberta': 'roberta-base', 'bert':'bert-base-cased'}

for fold_number in range(0,1):
  print(f'FOLD: {fold_number}')
  train_fold, val, test = [read_df_from_project_dir(f'food_chemical_10_folds/{fold_number}/{split_name}.csv').rename(columns={'sentence':'text'}) for split_name in ['train', 'val', 'test']]
  #test = test.append(train)

  model_args = ClassificationArgs(num_train_epochs=10, do_lower_case = False,
                                    overwrite_output_dir=True, output_dir=get_path_from_project_dir(f'trained_models/{bert_model_name}'), 
                                    best_model_dir =  get_path_from_project_dir(f'trained_models/{bert_model_name}/best'), 
                                    save_model_every_epoch =False, save_eval_checkpoints = False,save_steps = -1,
                                    evaluate_during_training_verbose=True,evaluate_during_training=True, 
                                    early_stopping_consider_epochs = True, use_early_stopping=True, early_stopping_patience = 2, early_stopping_delta=5e-3)

  for (augmentation_setting, train) in [('none', train_fold)]: 
    print()
    print(augmentation_setting)
  
    train, val, test = [d[['text','labels']].dropna().drop_duplicates() for d in [train,val,test]]
    train = train.append(val)
    val=test
    print(train.shape)
    print(train.groupby('labels').count())
    print(train.shape)
    model = ClassificationModel(
        bert_model_name if bert_model_name!='biobert' else 'bert',
        bert_model_config[bert_model_name],
        num_labels=len(set(train['labels'].values)), args=model_args)
    ! rm -r outputs
    model.train_model(train, eval_df=val)


FOLD: 0

none
(458, 2)
        text
labels      
0        155
1        303
(458, 2)


Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

rm: cannot remove 'outputs': No such file or directory


  0%|          | 0/1 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/58 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/58 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/58 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/58 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/58 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/58 [00:00<?, ?it/s]