# User Input

In [None]:
# enter the location of the sentiment clasification model
model_loc = '/content/drive/MyDrive/roberta_model'

# enter the location of the tokenizer
tokenizer_loc = '/content/drive/MyDrive/roberta_tokenizer'

# enter the location of all evaluation datasets (please make sure that there are only evaluation files in the folder)
eval_datasets_folder_loc = '/content/drive/MyDrive/datasets/evaluation datasets'

# please enter the runtime type: CPU or GPU
runtime_type = 'CPU'

# enter the folder location where the result dataset should be saved
time_evaluation_df_loc = '/content/drive/MyDrive/'

# Time Evaluation

In [None]:
!pip install transformers

In [None]:
import os
from os import listdir
from os.path import isfile, join
import pandas as pd

def extract_file_name(file_loc):
  return file_loc.split('/')[-1].split('.')[0]

def extract_datasets_map(datasets_location):
  location = datasets_location if datasets_location[-1] == '/' else f'{datasets_location}/'
  files_locations = [join(location, f) for f in listdir(location) if isfile(join(location, f))]

  print(f'Reading datasets from: {location} ...')

  assert files_locations != 0, 'No files found in the provided location'

  datasets_map = {}
  for f in files_locations:
    print(f'Reading dataset: {f} ...')
    dataset = pd.read_csv(f)
    datasets_map[extract_file_name(f)] = dataset

  print(f'Reading datasets successfully finished ...')

  return datasets_map


def create_results_folder(loc):
  parent_location = os.path.abspath(os.path.join(loc, os.pardir))
  mod_location = parent_location if parent_location[-1] == '/' else f'{parent_location}/'

  results_location = f'{mod_location}results'

  if not os.path.exists(results_location):
    os.makedirs(results_location)

  print(f'Created results dataset on location: {results_location} ...')

  return results_location

In [None]:
from transformers import pipeline
import torch

# loading model and tokenizer
if runtime_type == 'GPU':
  model = torch.load(model_loc)
  tokenizer = torch.load(tokenizer_loc)
else:
  model = torch.load(model_loc, map_location=torch.device('cpu'))
  tokenizer = torch.load(tokenizer_loc, map_location=torch.device('cpu'))

In [None]:
from transformers import pipeline
import timeit

def calc_time():
  return md(sentences)

eval_datasets_map = extract_datasets_map(eval_datasets_folder_loc)

if runtime_type == 'GPU':
  md = pipeline('sentiment-analysis', model, tokenizer=tokenizer, return_all_scores=True, device=0)
else:
  md = pipeline('sentiment-analysis', model, tokenizer=tokenizer, return_all_scores=True)
md.function_to_apply = 'sigmoid'

rows = []

for eval_name in eval_datasets_map:
  eval_df = eval_datasets_map[eval_name]
  sentences = list(eval_df.text.values)
  labels = eval_df.sentiment.values

  num_times = 10
  execution_time = timeit.timeit(calc_time, number=num_times)
  duration = execution_time / num_times

  sent_num = len(sentences)
  new_row = [f'RoBERTa {runtime_type}', eval_name, sent_num, duration]
  rows.append(new_row)

  cols = ['Source', 'Eval Dataset', 'Sentences No.', 'Time in s']

  transformer_time_df = pd.DataFrame(rows, columns = cols)

  transformer_time_df_loc = f'{time_evaluation_df_loc}/{runtime_type}_roberta_model_time_evaluation_average_10_times.csv'
  transformer_time_df.to_csv(transformer_time_df_loc, index=False)