# Evaluation of BERT Models

## Setup

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
import pandas as pd
import time
import numpy as np
import math
from sklearn.model_selection import train_test_split, KFold

In [0]:
!git clone https://github.com/deepset-ai/FARM.git
!pip install -r FARM/requirements.txt
!pip install FARM/

In [0]:
import torch
from farm.modeling.tokenization import BertTokenizer
from farm.data_handler.processor import TextClassificationProcessor
from farm.data_handler.data_silo import DataSilo
from farm.modeling.language_model import Bert
from farm.modeling.prediction_head import TextClassificationHead
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.optimization import initialize_optimizer
from farm.train import Trainer
from farm.data_handler.processor import NERProcessor
from farm.modeling.prediction_head import TokenClassificationHead
from farm.infer import Inferencer
from pprint import PrettyPrinter

#Load language model
MODEL_NAME_OR_PATH = "bert-base-german-cased"
language_model = Bert.load(MODEL_NAME_OR_PATH)

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Devices available: {}".format(device))

### Data

In [0]:
dataset_path = F"/content/gdrive/My Drive/Colab Notebooks/Corpus/full_annotations_v10.csv"

In [0]:
df = pd.read_csv(dataset_path)
reviews = list(set(df['Review'].values))
reviews.sort(key=lambda x: int(x[6:]))

## Component Identification & Classification combined

In [0]:
#New Preparation of the dataset
df_new = df

z='O'
combined_labels = list()
for i in range(len(df_new)):
  if df_new['BIO'].iloc[i] == 'B':
    z = df_new['Ann_Ann'].iloc[i]
    combined_labels.append('B-' + z)
  elif df_new['BIO'].iloc[i] == 'I':
    combined_labels.append('I-' + z)
  else:
    z = 'O'
    combined_labels.append('B-' + z)

df_new['combined_labels'] = combined_labels

#Split of the train and test data
reviews_train, reviews_test = train_test_split(reviews, test_size=0.2, random_state=42)
df_new_train = df_new[df_new['Review'].isin(reviews_train)]
df_new_test = df_new[df_new['Review'].isin(reviews_test)]

print(df_new.head(10))

In [0]:
f_train = open('train.txt', 'w+')
for rev in reviews_train:
  df_relevant = df_new_train[df_new_train['Review'] == rev]
  for i in range(len(df_relevant)):
    f_train.write(str(df_relevant['Tokens'].iloc[i]) + '\t' + str(df_relevant['combined_labels'].iloc[i] + '\n'))
 
  f_train.write('\n')
f_train.close()

In [0]:
f_test = open('test.txt', 'w+')
for rev in reviews_test:
  df_relevant = df_new_test[df_new_test['Review'] == rev]
  for i in range(len(df_relevant)):
    f_test.write(str(df_relevant['Tokens'].iloc[i]) + '\t' + str(df_relevant['combined_labels'].iloc[i] + '\n'))

  f_test.write('\n')
f_test.close()

#### Processing and Training

In [0]:
# Here we initialize a tokenizer that will be used for preprocessing text
# This is the BERT Tokenizer which uses the byte pair encoding method.
# It is currently loaded with a German model

tokenizer = BertTokenizer.from_pretrained(
    pretrained_model_name_or_path="bert-base-german-cased",
    do_lower_case=False)

In [0]:
# This processor will preprocess the data for the CoNLL03 NER task

ner_processor = NERProcessor(tokenizer=tokenizer, 
                             max_seq_len=512, 
                             data_dir="",
                             train_filename='train.txt',
                             dev_filename=None
                             #dev_split=0.1
                             )

ner_labels = ['B-Claim', 'I-Claim', 'B-Premise', 'I-Premise', 'B-O', 'X', '[PAD]']
ner_processor.add_task("ner", "seq_f1", ner_labels)

In [0]:
# This prediction head is also a feed forward neural network but expects one
# vector per token in the input sequence and will generate a set of logits
# for each input

LAYER_DIMS = [768, 7]

ner_prediction_head = TokenClassificationHead(layer_dims=LAYER_DIMS)

In [0]:
# We can integrate these new pieces with the rest using this code
# It is pretty much the same structure as what we had above for text classification

BATCH_SIZE = 8
EMBEDS_DROPOUT_PROB = 0.1
LEARNING_RATE = 2e-5
WARMUP_PROPORTION = 0.1
N_EPOCHS = 3
N_GPU = 1

data_silo = DataSilo(
    processor=ner_processor,
    batch_size=BATCH_SIZE)

model = AdaptiveModel(
    language_model=language_model,
    prediction_heads=[ner_prediction_head],
    embeds_dropout_prob=EMBEDS_DROPOUT_PROB,
    lm_output_types=["per_token"],
    device=device)

optimizer, warmup_linear = initialize_optimizer(
    model=model,
    learning_rate=LEARNING_RATE,
    warmup_proportion=WARMUP_PROPORTION,
    n_batches=len(data_silo.loaders["train"]),
    n_epochs=N_EPOCHS)

trainer = Trainer(
    optimizer=optimizer,
    data_silo=data_silo,
    epochs=N_EPOCHS,
    n_gpu=N_GPU,
    warmup_linear=warmup_linear,
    device=device,
)

In [0]:
model = trainer.train(model)

## Model Evaluation Direct Classification

### Data handling

In [0]:
#New Preparation of the dataset
df_new = df

z='O'
combined_labels = list()
for i in range(len(df_new)):
  if df_new['BIO'].iloc[i] == 'B':
    z = df_new['Ann_Ann'].iloc[i]
    combined_labels.append('B-B' + z)
  elif df_new['BIO'].iloc[i] == 'I':
    combined_labels.append('B-I' + z)
  else:
    z = 'O'
    combined_labels.append('B-' + z)

df_new['combined_labels'] = combined_labels

#Split of the train and test data
reviews_train, reviews_test = train_test_split(reviews, test_size=0.2, random_state=42)
df_new_train = df_new[df_new['Review'].isin(reviews_train)]
df_new_test = df_new[df_new['Review'].isin(reviews_test)]

print(df_new.head(10))

In [0]:
#def write_inputfiles_txt(df=[df_new_train, df_new_dev, df_new_test], reviews=[reviews_train, reviews_dev, reviews_test], part=['train', 'dev', 'test']):
def write_inputfiles_txt(df=[df_new_train, df_new_test], reviews=[reviews_train, reviews_test], part=['train', 'test']):
  r = 0
  for df_i in df:
    f = open('{}.txt'.format(part[r]), 'w+')
    for rev in reviews[r]:
      df_relevant = df_i[df_i['Review'] == rev]
      for i in range(len(df_relevant)):
        f.write(str(df_relevant['Tokens'].iloc[i]) + '\t' + str(df_relevant['combined_labels'].iloc[i]))
        f.write('\n')
      f.write('\n')
    f.close()
    r += 1

In [0]:
write_inputfiles_txt()

In [0]:
from google.colab import files
#Save the files to drive

files.download('train.txt')
files.download('test.txt')

In [0]:
import time
from farm.experiment import run_experiment, load_experiments
experiment_path = F"/content/gdrive/My Drive/Colab Notebooks/Corpus/experiments/Direct/arguEval_direct.json"
experiments = load_experiments(experiment_path)
f = open('logbook.txt', 'w+')
for i in range(len(experiments)):
  start = time.time()
  run_experiment(experiments[i])
  end = time.time()
  f.write('Experiment {}: runtime: {}'.format(i, end-start))
  f.write('\n')
f.close()
files.download('logbook.txt')

## Experiment

### Data handling

In [0]:
#New Preparation of the dataset
df_new = df

z='O'
combined_labels = list()
for i in range(len(df_new)):
  if df_new['BIO'].iloc[i] == 'B':
    z = df_new['Ann_Ann'].iloc[i]
    combined_labels.append('B-B' + z)
  elif df_new['BIO'].iloc[i] == 'I':
    combined_labels.append('B-I' + z)
  else:
    z = 'O'
    combined_labels.append('B-' + z)

df_new['combined_labels'] = combined_labels

#Split of the train and test data
reviews_train, reviews_dev_test = train_test_split(reviews, test_size=0.3, random_state=42)
reviews_dev, reviews_test = train_test_split(reviews_dev_test, test_size=(1/3), random_state=42)
df_new_train = df_new[df_new['Review'].isin(reviews_train)]
df_new_dev = df_new[df_new['Review'].isin(reviews_dev)]
df_new_test = df_new[df_new['Review'].isin(reviews_test)]

print(df_new.head(10))

In [0]:
def write_inputfiles_txt(df=[df_new_train,df_new_dev, df_new_test], reviews=[reviews_train, reviews_dev, reviews_test], part=['train1','dev1', 'test1']):
  r = 0
  for df_i in df:
    f = open('{}.txt'.format(part[r]), 'w+')
    for rev in reviews[r]:
      df_relevant = df_i[df_i['Review'] == rev]
      for i in range(len(df_relevant)):
        f.write(str(df_relevant['Tokens'].iloc[i]) + '\t' + str(df_relevant['combined_labels'].iloc[i]))
        f.write('\n')
      f.write('\n')
    f.close()
    r += 1

In [0]:
write_inputfiles_txt()

In [0]:
#new writing approach --> NOT WORKING
"""
def write_inputfiles_txt(df, reviews, part):
  f = open('{}.txt'.format(part), 'w+')
  for rev in reviews[r]:
    df_relevant = df_i[df_i['Review'] == rev]
    for i in range(len(df_relevant)):
      f.write(str(df_relevant['Tokens'].iloc[i]) + '\t' + str(df_relevant['combined_labels'].iloc[i]))
      f.write('\n')
  f.write('\n')
  f.close()"""

In [0]:
"""write_inputfiles_txt(df_new_train, reviews_train, 'train')
write_inputfiles_txt(df_new_dev, reviews_dev, 'dev')
write_inputfiles_txt(df_new_test, reviews_test, 'test')""""

In [0]:
from google.colab import files
#Save the files to drive

files.download('train.txt')
files.download('dev.txt')
files.download('test.txt')

### run experiment

In [0]:

from farm.experiment import run_experiment, load_experiments
experiment_path = F"/content/gdrive/My Drive/Colab Notebooks/Corpus/experiments/Direct/arguEval_direct_new_new.json"
experiments = load_experiments(experiment_path)

for i in range(len(experiments)):
  run_experiment(experiments[i])


## New Best Model
learningrate = 5e-5;

warumup_proportion = 0.1;

embedding_dropout_prob = 0.15


In [0]:
from farm.experiment import run_experiment, load_experiments
final_model_path = F"/content/gdrive/My Drive/Colab Notebooks/Corpus/experiments/Direct/final_model.json"
final_model = load_experiments(final_model_path)

run_experiment(final_model[0])

In [0]:
from farm.experiment import run_experiment, load_experiments
final_model_path = F"/content/gdrive/My Drive/Colab Notebooks/Corpus/experiments/Direct/final_model_premise_claim.json"
final_model = load_experiments(final_model_path)

run_experiment(final_model[0])

### Data handling

In [0]:
#New Preparation of the dataset
df_new = df

z='O'
combined_labels = list()
for i in range(len(df_new)):
  if df_new['BIO'].iloc[i] == 'B':
    z = df_new['Ann_Ann'].iloc[i]
    combined_labels.append('B-' + z)
  elif df_new['BIO'].iloc[i] == 'I':
    combined_labels.append('I-' + z)
  else:
    z = 'O'
    combined_labels.append(z)

df_new['combined_labels'] = combined_labels

#Split of the train and test data
reviews_train, reviews_dev_test = train_test_split(reviews, test_size=0.3, random_state=42)
reviews_dev, reviews_test = train_test_split(reviews_dev_test, test_size=(1/3), random_state=42)
df_new_train = df_new[df_new['Review'].isin(reviews_train)]
df_new_dev = df_new[df_new['Review'].isin(reviews_dev)]
df_new_test = df_new[df_new['Review'].isin(reviews_test)]

print(df_new.head(10))

In [0]:
def write_inputfiles_txt(df=[df_new_train,df_new_dev, df_new_test], reviews=[reviews_train, reviews_dev, reviews_test], part=['train1','dev1', 'test1']):
  r = 0
  for df_i in df:
    f = open('{}.txt'.format(part[r]), 'w+')
    for rev in reviews[r]:
      df_relevant = df_i[df_i['Review'] == rev]
      for i in range(len(df_relevant)):
        f.write(str(df_relevant['Tokens'].iloc[i]) + '\t' + str(df_relevant['combined_labels'].iloc[i]))
        f.write('\n')
      f.write('\n')
    f.close()
    r += 1

In [0]:
write_inputfiles_txt()

In [0]:
from google.colab import files
#Save the files to drive

files.download('train1.txt')
files.download('dev1.txt')
files.download('test1.txt')

In [0]:
from farm.experiment import run_experiment, load_experiments
final_model_path = F"/content/gdrive/My Drive/Colab Notebooks/Corpus/experiments/Direct/final_model_premise_claim.json"
final_model = load_experiments(final_model_path)

run_experiment(final_model[0])