In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sentence_transformers import SentenceTransformer
import warnings
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
warnings.filterwarnings("ignore")

# from yellowbrick.classifier import ClassPredictionError
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix, recall_score, precision_score

In [2]:
model = SentenceTransformer('all-mpnet-base-v2')
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
)

In [3]:
df = pd.read_csv('p3_dataset.csv')
print(df.shape)
df = df.drop_duplicates()
df

(11858, 2)


Unnamed: 0,text,class
0,latest headlin cnn busi tl dr u govern expect ...,ctrl
1,china want take victori lap handl coronaviru o...,ctrl
2,coronaviru disinform creat challeng china gove...,ctrl
3,china coronaviru eat wild anim made illeg end ...,ctrl
4,china economi could shrink first time decad co...,ctrl
...,...,...
11853,today world concern authoritarian racism natio...,instruct_gpt
11854,keep space station clean astronaut must practi...,instruct_gpt
11855,citi requir tenant pay first month rent someti...,instruct_gpt
11856,it’ easi brows customis app get news way keep ...,instruct_gpt


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11696 entries, 0 to 11857
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    11696 non-null  object
 1   class   11696 non-null  object
dtypes: object(2)
memory usage: 274.1+ KB


In [5]:
sentence_embeddings = model.encode(list(df['text']))
len(sentence_embeddings)

11696

In [6]:
df_emb = pd.DataFrame(sentence_embeddings)
df_emb

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,758,759,760,761,762,763,764,765,766,767
0,-0.008837,0.117042,-0.007996,-0.040766,0.003559,0.019983,0.032282,0.059083,0.079155,-0.026660,...,0.054842,0.004712,0.026940,0.034705,-0.011543,0.026539,-0.001364,-0.013121,-0.058519,-0.031064
1,0.030233,0.062680,-0.003427,-0.052569,0.000292,-0.037695,0.017265,0.010402,0.062493,-0.009577,...,0.001277,-0.041046,-0.001989,0.037358,-0.040020,0.024194,0.003996,0.003367,-0.019739,-0.061357
2,0.026796,0.028957,0.001495,-0.020572,0.021135,-0.025137,0.012322,0.014289,0.084895,0.004267,...,-0.016816,-0.035568,-0.016539,0.042458,-0.051677,0.003247,0.004773,-0.027866,0.003004,-0.054703
3,0.038854,0.124226,0.013936,-0.065665,-0.007498,0.001471,0.024547,0.053904,0.081720,-0.035615,...,0.029420,-0.011628,0.012565,0.003950,0.008221,0.061832,0.001783,-0.009111,-0.016666,-0.026918
4,0.008881,0.103756,-0.006652,-0.017220,0.028548,-0.023966,0.029187,0.011400,0.047449,-0.005421,...,0.007830,-0.023047,-0.005750,0.007509,-0.020088,0.030981,0.016948,-0.021072,0.008384,-0.055183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11691,-0.026308,0.073993,0.007851,-0.021422,-0.030106,0.025697,0.020187,0.011591,0.040143,-0.040310,...,-0.036719,-0.026492,-0.054840,0.015334,0.003916,0.041460,-0.018680,-0.017782,-0.038553,-0.014994
11692,0.024650,0.009710,-0.024212,0.004624,-0.010820,-0.047083,0.013590,-0.017171,0.016600,0.002337,...,-0.033346,-0.081403,-0.011098,0.021980,-0.000797,-0.002005,-0.009574,-0.000511,0.025943,-0.005953
11693,-0.037773,-0.011640,-0.019491,-0.047768,0.020417,0.030706,0.085535,0.044130,0.059152,0.006162,...,-0.029339,0.027068,0.042737,-0.009913,-0.030611,0.012892,0.050776,0.004717,-0.040093,-0.004678
11694,0.007704,-0.021367,-0.006286,0.004926,-0.012738,-0.000665,0.018413,0.040275,0.055009,0.003289,...,0.065696,0.016113,0.010010,0.026507,0.008702,0.003356,0.000737,0.060205,-0.057147,-0.011639


### Splitting the data

In [8]:
X_train, X_test, y_train, y_test = train_test_split(df_emb,
                                                    df['class'],
                                                    stratify = df['class'],
                                                    test_size = 0.2,
                                                    random_state = 1234)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(9356, 768) (2340, 768) (9356,) (2340,)


In [9]:
y_train.value_counts()

gpt3            960
xlnet           853
gpt             853
fair            853
ctrl            853
gpt2            852
xlm             852
grover          852
instruct_gpt    851
human           792
pplm            785
Name: class, dtype: int64

In [10]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,758,759,760,761,762,763,764,765,766,767
10792,0.063163,0.020489,-0.005989,0.003281,-0.035063,-0.033848,0.002709,0.010068,-0.047196,0.004272,...,0.032946,-0.036872,0.034326,0.046818,0.010348,-0.031301,-0.002422,0.012020,-0.026906,-0.006574
5651,0.052671,0.038410,-0.018567,0.032686,0.037928,0.007734,0.008404,0.039482,0.041146,-0.024742,...,0.043965,0.010870,0.007974,-0.000729,-0.001360,0.017888,0.019582,0.038117,-0.007196,-0.025581
7962,0.036301,0.055222,-0.017431,0.027567,-0.014467,-0.002205,0.012505,0.070554,0.026014,-0.000574,...,0.001012,-0.013430,-0.015578,0.047647,-0.015474,-0.006416,0.006525,-0.030009,-0.014820,-0.043506
2504,0.018964,-0.010613,0.030587,-0.017238,0.002933,0.022028,0.049659,0.038238,0.013699,0.047704,...,-0.005205,-0.000225,0.004268,-0.008584,-0.003743,-0.065424,0.002164,0.015474,0.010478,-0.006202
6991,0.001491,0.079875,-0.021308,0.016338,0.034196,0.023870,0.044254,0.056388,0.021583,0.015146,...,-0.034605,-0.009073,-0.019389,0.041988,-0.009859,-0.017702,0.027600,-0.046783,0.002607,-0.033521
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5763,0.001401,0.005015,-0.022702,-0.014137,-0.006517,-0.020133,0.011409,0.008336,-0.028118,0.031970,...,0.024870,-0.041380,0.030007,0.010715,-0.017241,0.018799,-0.025948,0.026815,0.008391,-0.060845
6544,0.006986,0.106473,0.010655,0.018763,-0.020198,0.049001,0.008855,0.067513,0.036054,-0.046657,...,-0.012811,0.048908,-0.007339,0.027816,-0.007680,0.035326,-0.021071,-0.024930,-0.044101,-0.011001
9711,0.012206,0.058847,-0.009724,0.032277,-0.036247,-0.033877,0.004720,-0.009572,-0.024258,0.009916,...,-0.110826,-0.010336,0.047740,-0.003086,-0.016764,-0.006482,-0.054531,0.025817,-0.031273,-0.031522
129,0.073733,0.069292,0.005147,-0.024776,0.022127,-0.005185,0.001671,0.026837,-0.018658,-0.068894,...,0.034972,0.023586,0.040013,0.011976,0.030617,0.085325,-0.014491,0.010556,-0.017676,-0.023043


### Model Training

In [13]:
# def visualizer(clf):
#     visualizer = ClassPredictionError(clf, classes= [0, 1])

#     # Fit the training data to the visualizer
#     visualizer.fit(X_train, y_train)

#     # Evaluate the model on the test data
#     visualizer.score(X_test, y_test)

#     # Draw visualization
#     visualizer.show()
    
def metrics(pred):
    classes = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt']
    matrix = confusion_matrix(y_test, pred, labels = classes)
    mat = matrix.diagonal()/matrix.sum(axis=1)
    print(classification_report(y_test, pred, labels = classes,
                                digits=4))

    print('confusion matrix: ', mat)

    Accuracy = accuracy_score(y_test,pred)
    F1 = f1_score(y_test, pred, average='macro')
    print("Accuracy:", Accuracy)

    rec = recall_score(y_test, pred, average='macro')
    print('Recall: ', rec)
    prec = precision_score(y_test, pred, average='macro')
    print('Precision: ', prec)

    print('F1:', F1)
    
def get_predictions(tpr, fpr, threshold, ypred):
    #If tpr is hight & fpr is low : (tpr*(1-fpr)) is maximum
    actual_ypred = []

    thres = threshold[np.argmax(tpr * (1 - fpr))]
    for value in ypred:
        if value < thres:
            actual_ypred.append(0)
        else:
            actual_ypred.append(1)
    return actual_ypred
    
def get_roc_curve(model_name, classifier):
    
    Ypred_train = classifier.predict(X_train)
    fpr_train, tpr_train, threshold_train = roc_curve(y_train, Ypred_train)
    
    Ypred_test = classifier.predict(X_test)
    fpr_test, tpr_test, threshold_test = roc_curve(y_test, Ypred_test)

    auc_train = round(auc(fpr_train, tpr_train), 4)
    auc_test = round(auc(fpr_test, tpr_test), 4)

    plt.rcParams["figure.figsize"] = [5, 4]
    plt.plot(fpr_train, tpr_train, label = f'train AUC : {auc_train}')
    plt.plot(fpr_test, tpr_test, label = f'test AUC : {auc_test}')
    plt.legend()
    plt.grid()
    plt.title(f'{model_name} ROC Curve')
    plt.xlabel('parameter') 
    plt.ylabel('AUC')
    plt.show()

    roc_results = {
    'ypred_train' : Ypred_train,
    'ypred_test' : Ypred_test,
    'fpr_train' : fpr_train,
    'fpr_test' : fpr_test,
    'tpr_train' : tpr_train,
    'tpr_test' : tpr_test,
    'threshold_train' : threshold_train,
    'threshold_test' : threshold_test
    }
    return roc_results

def get_confusion_matrix(model_name, roc_results):
    actual_ypred_train = get_predictions(roc_results['tpr_train'],
                                         roc_results['fpr_train'],
                                         roc_results['threshold_train'],
                                         roc_results['ypred_train'])

    matrix_train = confusion_matrix(y_train, actual_ypred_train)

    actual_ypred_test = get_predictions(roc_results['tpr_test'],
                                        roc_results['fpr_test'],
                                        roc_results['threshold_test'],
                                        roc_results['ypred_test'])

    matrix_test = confusion_matrix(y_test, actual_ypred_test)

    plt.rcParams["figure.figsize"] = [12, 5]
    plt.rcParams["figure.autolayout"] = True
    f, axes = plt.subplots(1, 2)
    
    axes[0].title.set_text(f'{model_name} Training Confusion Matrix')
    sns.heatmap(matrix_train, annot = True, ax = axes[0], fmt = "d")


    sns.heatmap(matrix_test, annot = True, ax = axes[1], fmt = "d")
    axes[1].title.set_text(f'{model_name} Testing Confusion Matrix')

    return actual_ypred_test, actual_ypred_train

### Random Forest Classifier

In [12]:
randomforest_model = RandomForestClassifier()

parameters = { 'max_depth' : [10, 20, 30],
               'n_estimators' : [90, 150, 180],
               'max_samples' : [0.6, 0.8]
 }

cross_validation = 3
scoring_metric = "f1"
randomforest_model_cv = GridSearchCV(randomforest_model, 
                                     parameters,
                                     cv = cross_validation,
                                     scoring = scoring_metric,
                                     return_train_score=True)

randomforest_model_cv.fit(X_train, y_train)
print('Best Params ', randomforest_model_cv.best_params_)

Best Params  {'max_depth': 10, 'max_samples': 0.6, 'n_estimators': 90}


In [17]:
parameters = {
     'max_depth' : 10,
     'n_estimators' : 180,
     'max_samples' : 0.8
}
randomforest_model = RandomForestClassifier(max_depth = parameters['max_depth'],
                                            max_samples = parameters['max_samples'],
                                            n_estimators = parameters['n_estimators'])

randomforest_model.fit(X_train, y_train)
pred = randomforest_model.predict(X_test)

In [19]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.4982    0.6338    0.5579       213
         gpt     0.8041    0.9249    0.8603       213
        gpt2     0.2973    0.1549    0.2037       213
      grover     0.3632    0.3411    0.3518       214
         xlm     0.7759    0.8738    0.8220       214
       xlnet     0.7449    0.8498    0.7939       213
        pplm     0.4415    0.4235    0.4323       196
       human     0.2933    0.1111    0.1612       198
        fair     0.2690    0.2160    0.2396       213
        gpt3     0.3630    0.6458    0.4648       240
instruct_gpt     0.3533    0.2770    0.3105       213

    accuracy                         0.5004      2340
   macro avg     0.4731    0.4956    0.4725      2340
weighted avg     0.4733    0.5004    0.4748      2340

confusion matrix:  [0.63380282 0.92488263 0.15492958 0.3411215  0.87383178 0.84976526
 0.42346939 0.11111111 0.21596244 0.64583333 0.27699531]
Accuracy: 0.5004273504273504
Recall:  0.4956

### Logistic Regression

In [18]:
logistic_model = LogisticRegression()
parameters = { 'C' : [0.01, 0.1, 1, 3, 10],
               'penalty' : ['l2', 'elasticnet']
             }

cross_validation = 3
scoring_metric = "f1"

logistic_model_cv = GridSearchCV(logistic_model,
                                 parameters,
                                 cv = cross_validation,
                                 scoring = scoring_metric,
                                 return_train_score=True)

logistic_model_cv.fit(X_train, y_train)
print('Best Params ', logistic_model_cv.best_params_)

Best Params  {'C': 10, 'penalty': 'l2'}


In [20]:
parameters = { 'C' : 10,
               'penalty' : 'l2'
 }

logistic_model = LogisticRegression(C = parameters['C'], penalty = parameters['penalty'])
logistic_model.fit(X_train, y_train)
pred = logistic_model.predict(X_test)

In [21]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.8142    0.8638    0.8383       213
         gpt     0.9585    0.9765    0.9674       213
        gpt2     0.5561    0.5352    0.5455       213
      grover     0.5041    0.5748    0.5371       214
         xlm     0.9361    0.9579    0.9469       214
       xlnet     0.9000    0.8873    0.8936       213
        pplm     0.6173    0.6173    0.6173       196
       human     0.4235    0.3636    0.3913       198
        fair     0.4040    0.3756    0.3893       213
        gpt3     0.5532    0.5417    0.5474       240
instruct_gpt     0.5364    0.5540    0.5450       213

    accuracy                         0.6598      2340
   macro avg     0.6549    0.6589    0.6563      2340
weighted avg     0.6555    0.6598    0.6571      2340

confusion matrix:  [0.86384977 0.97652582 0.53521127 0.57476636 0.95794393 0.88732394
 0.61734694 0.36363636 0.37558685 0.54166667 0.55399061]
Accuracy: 0.6598290598290598
Recall:  0.6588

### XG Boost

In [23]:
import xgboost

xgboost_model = xgboost.XGBClassifier()



parameters = { 'max_depth' : [10, 20, 30],
               'n_estimators' : [90, 150, 180],
               'min_child_weight' : [1, 5, 10 ]
 }

cross_validation = 3
scoring_metric = "f1"
xgboost_model_cv = GridSearchCV(xgboost_model, 
                                parameters,
                                cv = cross_validation,
                                scoring = scoring_metric,
                                return_train_score=True)

xgboost_model_cv.fit(X_train, y_train)
print('Best Params ', xgboost_model_cv.best_params_)

Best Params  {'max_depth': 10, 'min_child_weight': 10, 'n_estimators': 180}


In [32]:
from sklearn.preprocessing import LabelEncoder
import xgboost
# encode class labels into integers
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

parameters = { 'max_depth' : 10,
               'min_child_weight' : 10,
               'n_estimators' : 180
 }

xgboost_model = xgboost.XGBClassifier(max_depth = parameters['max_depth'],
                                      min_child_weight = parameters['min_child_weight'],
                                      n_estimators = parameters['n_estimators'])
xgboost_model.fit(X_train, y_train)
pred = xgboost_model.predict(X_test)

In [34]:
class_labels = encoder.classes_
class_to_int = dict(zip(class_labels, range(len(class_labels))))
print(class_to_int)

{'ctrl': 0, 'fair': 1, 'gpt': 2, 'gpt2': 3, 'gpt3': 4, 'grover': 5, 'human': 6, 'instruct_gpt': 7, 'pplm': 8, 'xlm': 9, 'xlnet': 10}


In [35]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = list(class_to_int.values()))
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = list(class_to_int.values()),
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

           0     0.7054    0.7418    0.7231       213
           1     0.3053    0.2723    0.2878       213
           2     0.9095    0.9437    0.9263       213
           3     0.4467    0.4131    0.4293       213
           4     0.4888    0.5458    0.5157       240
           5     0.4369    0.4206    0.4286       214
           6     0.4038    0.3182    0.3559       198
           7     0.4315    0.4883    0.4581       213
           8     0.5377    0.5459    0.5418       196
           9     0.8744    0.9112    0.8924       214
          10     0.8651    0.8732    0.8692       213

    accuracy                         0.5902      2340
   macro avg     0.5823    0.5886    0.5844      2340
weighted avg     0.5827    0.5902    0.5854      2340

confusion matrix:  [0.74178404 0.27230047 0.94366197 0.41314554 0.54583333 0.42056075
 0.31818182 0.48826291 0.54591837 0.91121495 0.87323944]
Accuracy: 0.5901709401709402
Recall:  0.5885

### SVM

In [30]:
svm_model = SVC()

parameters = { 'kernel' : ['poly', 'rbf', 'sigmoid'],
               'max_iter' : [20, 50, 100]
 }

cross_validation = 3
scoring_metric = "f1"
svm_model_cv = GridSearchCV(svm_model, 
                            parameters,
                            cv = cross_validation,
                            scoring = scoring_metric,
                            return_train_score=True)

svm_model_cv.fit(X_train, y_train)
print('Best Params ', svm_model_cv.best_params_)

Best Params  {'kernel': 'rbf', 'max_iter': 100}


In [28]:
parameters = {
         'kernel' : 'rbf',
         'max_iter' : 100,
        }

svm_model = SVC(kernel = parameters['kernel'],
max_iter = parameters['max_iter'], probability = True)

svm_model.fit(X_train, y_train)
pred = svm_model.predict(X_test)

In [29]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.7961    0.7700    0.7828       213
         gpt     0.9324    0.9718    0.9517       213
        gpt2     0.4213    0.3897    0.4049       213
      grover     0.4043    0.2664    0.3211       214
         xlm     0.9148    0.9533    0.9336       214
       xlnet     0.9043    0.8873    0.8957       213
        pplm     0.4402    0.5255    0.4791       196
       human     0.3904    0.4495    0.4178       198
        fair     0.2978    0.3146    0.3059       213
        gpt3     0.5025    0.4167    0.4556       240
instruct_gpt     0.4805    0.5775    0.5245       213

    accuracy                         0.5923      2340
   macro avg     0.5895    0.5929    0.5884      2340
weighted avg     0.5909    0.5923    0.5888      2340

confusion matrix:  [0.76995305 0.97183099 0.38967136 0.26635514 0.95327103 0.88732394
 0.5255102  0.44949495 0.31455399 0.41666667 0.57746479]
Accuracy: 0.5923076923076923
Recall:  0.5929

### Multinomial Naive Bayes

In [30]:
bnb_model = BernoulliNB()
bnb_model.fit(X_train, y_train)
pred = bnb_model.predict(X_test)

In [31]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.4496    0.5023    0.4745       213
         gpt     0.8122    0.8732    0.8416       213
        gpt2     0.3571    0.1878    0.2462       213
      grover     0.2960    0.3084    0.3021       214
         xlm     0.7066    0.7991    0.7500       214
       xlnet     0.7200    0.7606    0.7397       213
        pplm     0.3518    0.4541    0.3964       196
       human     0.2113    0.1515    0.1765       198
        fair     0.2973    0.2066    0.2438       213
        gpt3     0.4018    0.5625    0.4688       240
instruct_gpt     0.3385    0.3052    0.3210       213

    accuracy                         0.4679      2340
   macro avg     0.4493    0.4647    0.4510      2340
weighted avg     0.4510    0.4679    0.4534      2340

confusion matrix:  [0.50234742 0.87323944 0.18779343 0.30841121 0.79906542 0.76056338
 0.45408163 0.15151515 0.20657277 0.5625     0.30516432]
Accuracy: 0.46794871794871795
Recall:  0.464

In [36]:
import tabulate
print('Task P3 : Sentence Transformers - Model all-mpnet-base-v2')
conclusion = [['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score'],
              ['Logistic Regression', 65.9, 65.4, 65.8, 65.6],
              ['XGBoost Classifier', 59.1, 58.2, 58.8, 58.4, ],
              ['Random Forest Classifier', 50.0, 47.3, 49.5, 47.25 ],
              ['SVM Classifier', 59.2, 58.9, 59.2, 58.8],
             ['Naive Bayes Classifier',  46.7, 45.2, 46.4, 45.1],   
             ]
print(tabulate.tabulate(conclusion, tablefmt='fancy_grid'))

Task P3 : Sentence Transformers - Model all-mpnet-base-v2
╒══════════════════════════╤══════════╤═══════════╤════════╤══════════╕
│ Model                    │ Accuracy │ Precision │ Recall │ F1 Score │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┤
│ Logistic Regression      │ 65.9     │ 65.4      │ 65.8   │ 65.6     │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┤
│ XGBoost Classifier       │ 59.1     │ 58.2      │ 58.8   │ 58.4     │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┤
│ Random Forest Classifier │ 50.0     │ 47.3      │ 49.5   │ 47.25    │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┤
│ SVM Classifier           │ 59.2     │ 58.9      │ 59.2   │ 58.8     │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┤
│ Naive Bayes Classifier   │ 46.7     │ 45.2      │ 46.4   │ 45.1     │
╘══════════════════════════╧══════════╧═══════════╧════════╧══════════╛
