### Table of Contents
* [1 Imports](#chapter1)
* [2 Experiment](#chapter2)
    * [2.1 Traditional methods](#section_2_1)
    * [2.2 XLM-R](#section_2_2)

# Imports <a class="anchor" id="chapter1"></a>
* Import necessary libraries and data

In [1]:
#libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from utils import configs
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from statistics import mean
from statistics import stdev
import ktrain
from ktrain import text
from utils.experiment_models import RepeatedBaselines
from utils.experiment_models import RandomClassifier as RC
from utils.experiment_models import RepeatedBERT
from utils import utils
import numpy as np

#data
finalized_dataset = pd.read_csv('../../data/arabic_dataset.csv', index_col=0)

#variables
RANDOM_SEED = configs.RANDOM_SEED

# Experiment <a class="anchor" id="chapter2"></a>

In [2]:
#renaming some columns to be the same name as other dataset
finalized_dataset = finalized_dataset.rename(columns={'Number of Question': 'Question_Nr', 'label': 'Labels', 'Responses':'Response'})
finalized_dataset.head(5)


Unnamed: 0,Question,Right_Answer,Grade,Number,Response,Question_Nr,Labels
0,عرف مصطلح الجريمة الإلكترونية,هي كل سلوك غير قانوني يتم باستخدام الأجهزة ال...,3.0,[1],هي سلوك غير أخلاقي يتم عن طريق وسائل الكترونية...,1,1
1,عرف مصطلح الجريمة الإلكترونية,هي كل سلوك غير قانوني يتم باستخدام الأجهزة ال...,5.0,[2],هي كل سلوك غير أخلاقي يتم بواسطة الاجهزة الالك...,1,1
2,عرف مصطلح الجريمة الإلكترونية,هي كل سلوك غير قانوني يتم باستخدام الأجهزة ال...,2.625,[3],هي سلوك غير قانوني يحمل باستعمال الأجهزة الالك...,1,1
3,عرف مصطلح الجريمة الإلكترونية,هي كل سلوك غير قانوني يتم باستخدام الأجهزة ال...,4.0,[4],هي سلوك غير قانوني تستخدم الوسائل الالكترونية ...,1,1
4,عرف مصطلح الجريمة الإلكترونية,هي كل سلوك غير قانوني يتم باستخدام الأجهزة ال...,3.5,[5],هي كل سلوك غير أخلاقي يتم باستخدام الوسائل الا...,1,1


In [3]:
#GLOBAL PARAMETERS
TEST_SIZE = 0.15
VALID_SIZE = 0.15
ITERATIONS = 5
METRICS = ['average_precision', 'roc_auc', 'spearman','averaged_classification_report']

#GLOBAL DATA
responses = finalized_dataset.Response
labels = finalized_dataset.Labels
corr_data = finalized_dataset.Grade

## 2.1 Traditional methods  <a class="anchor" id="section_2_1"></a>

In [4]:
#FOR REPRESENTATION
vectorizer = TfidfVectorizer(max_features=50000, ngram_range=(1,1))

#MODELS
#logistic regressor:
regressor = LogisticRegression(max_iter=400,random_state=RANDOM_SEED)
#random forest:
rf = RandomForestClassifier(random_state=RANDOM_SEED)
#1-nn:
one_NN = KNeighborsClassifier(n_neighbors=1)
#3-nn:
three_NN = KNeighborsClassifier(n_neighbors=3)
#random classifier:
rc = RC.RandomClassifier(random_state=RANDOM_SEED, change_state=True)

MODELS_MAP = {
    'Logistic Regressor':regressor,
    'Random Forest':rf,
    '1-NN':one_NN,
    '3-NN':three_NN,
    'Random Classifier':rc
}

#REPEATED_HOLD_OUT MODEL USED FOR EXPERIMENTS
rho = RepeatedBaselines.RepeatedBaselines(models=MODELS_MAP, metrics=METRICS, iterations=ITERATIONS, random_state=RANDOM_SEED)

#FOR PRINTING RESULTS
def print_results(results):
    for model, result in results.items():

      auprc_results = result['average_precision']
      pos_label_scores = []
      neg_label_scores = []
      for neg, pos in auprc_results:
          pos_label_scores.append(pos[1])
          neg_label_scores.append(neg[1])

      spearmans = [spearman for spearman, _ in result['spearman'] if not np.isnan(spearman)]
      print('Model: {}. Iterations: {}'.format(model,ITERATIONS))
      print('Average AUPRC (Positive: 0): {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(neg_label_scores), stdev(neg_label_scores)))
      print('Average AUPRC (Positive: 1): {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(pos_label_scores), stdev(pos_label_scores)))
      print('Average ROC AUC: {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(result['roc_auc']), stdev(result['roc_auc'])))
      print('Average Spearman correlation: {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(spearmans), stdev(spearmans)))

      report = result['averaged_classification_report']
      formated_string = utils.format_report(report)
      print(formated_string + '\n\n')

In [5]:
repeated_splits = rho.repeated_split(X=responses, y=labels, test_size=TEST_SIZE, valid_size=VALID_SIZE, stratify=labels)
split_list = []

for x_train, y_train, x_dev, y_dev, x_test, y_test, idx1, idx2 in repeated_splits:
    split_list.append([x_train, x_test, y_train, y_test, idx1, idx2])

conv_repeated_splits = rho.convert_data(split_list=split_list, representation=vectorizer)
results = rho.fit_predict(split_list=conv_repeated_splits,correlation_data=corr_data)
print_results(results)

Model: Logistic Regressor. Iterations: 5
Average AUPRC (Positive: 0): 0.54. Standard deviation: 0.059.
Average AUPRC (Positive: 1): 0.83. Standard deviation: 0.029.
Average ROC AUC: 0.71. Standard deviation: 0.040.
Average Spearman correlation: 0.41. Standard deviation: 0.062.
0-> precision: 0.62, recall: 0.09, f1-score: 0.16, support: 108.40, 
1-> precision: 0.68, recall: 0.97, f1-score: 0.80, support: 211.60, 
accuracy-> 0.67
macro avg-> precision: 0.65, recall: 0.53, f1-score: 0.48, support: 320.00, 
weighted avg-> precision: 0.66, recall: 0.67, f1-score: 0.58, support: 320.00, 



Model: Random Forest. Iterations: 5
Average AUPRC (Positive: 0): 0.52. Standard deviation: 0.059.
Average AUPRC (Positive: 1): 0.81. Standard deviation: 0.041.
Average ROC AUC: 0.69. Standard deviation: 0.059.
Average Spearman correlation: 0.39. Standard deviation: 0.110.
0-> precision: 0.62, recall: 0.21, f1-score: 0.31, support: 108.40, 
1-> precision: 0.70, recall: 0.94, f1-score: 0.80, support: 211.60

## 2.2 XLM-R <a class="anchor" id="section_2_2"></a>

In [None]:
#FOR PRINTING RESULTS
def print_results_bert(results):
    spearmans = [spearman for spearman, _ in results['spearman']]

    auprc_results = results['average_precision']
    pos_label_scores = []
    neg_label_scores = []
    for neg, pos in auprc_results:
        pos_label_scores.append(pos[1])
        neg_label_scores.append(neg[1])

    print('Model: XLM-R. Iterations: {}'.format(ITERATIONS))
    print('Average AUPRC (Positive: 0): {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(neg_label_scores), stdev(neg_label_scores)))
    print('Average AUPRC (Positive: 1): {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(pos_label_scores), stdev(pos_label_scores)))
    print('Average ROC AUC: {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(results['roc_auc']), stdev(results['roc_auc']) if len(results['average_precision']) > 1 else 0))
    print('Average Spearman correlation: {:0.2f}. Standard deviation: {:0.3f}.'.format(mean(spearmans), stdev(spearmans) if len(spearmans) > 1 else 0))

    report = results['averaged_classification_report']
    formated_string = utils.format_report(report)
    print(formated_string + '\n\n')

#xlm-r parameters
MODEL_NAME = "xlm-roberta-base"
BATCH_SIZE = 6
MAX_LEN = 200
CLASS_NAMES = [0,1]
EPOCHS = 30
LR = 1e-5
#xlm-r
t = text.Transformer(MODEL_NAME, maxlen=200, class_names=CLASS_NAMES)

In [None]:
#scrambled merged experiment
repeated_bert = RepeatedBERT.RepeatedBERT(transformer=t,metrics=METRICS,iterations=ITERATIONS,random_state=RANDOM_SEED)
#get split_list
split_list = repeated_bert.repeated_split(X=responses, y=labels,test_size=TEST_SIZE,valid_size=VALID_SIZE, stratify=labels)
#fit and predict each split
bert_results = repeated_bert.fit_predict(split_list=split_list,batch_size=BATCH_SIZE,epochs=EPOCHS,lr=LR,correlation_data=corr_data)
#print results
print_results_bert(bert_results)

In [None]:
#Code ran on COLAB, these were the results:
# Model: XLM-R. Iterations: 5
# Average AUPRC (Positive: 0): 0.60. Standard deviation: 0.026.
# Average AUPRC (Positive: 1): 0.85. Standard deviation: 0.024.
# Average ROC AUC: 0.75. Standard deviation: 0.030.
# Average Spearman correlation: 0.48. Standard deviation: 0.040.
# 0-> precision: 0.64, recall: 0.45, f1-score: 0.52, support: 108.40,
# 1-> precision: 0.76, recall: 0.87, f1-score: 0.81, support: 211.60,
# accuracy-> 0.73
# macro avg-> precision: 0.70, recall: 0.66, f1-score: 0.66, support: 320.00,
# weighted avg-> precision: 0.72, recall: 0.73, f1-score: 0.71, support: 320.00,