In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB

import warnings
warnings.filterwarnings("ignore")

# from yellowbrick.classifier import ClassPredictionError
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix, recall_score, precision_score

In [7]:
df = pd.read_csv('open_ai_p3_embeddings.csv')
print(df.shape)
df = df.drop_duplicates()
df

(11858, 3)


Unnamed: 0,text,class,ada_embedding
0,latest headlin cnn busi tl dr u govern expect ...,ctrl,"[-0.0244539026170969, -0.015060365200042725, 0..."
1,china want take victori lap handl coronaviru o...,ctrl,"[0.0019263223512098193, -0.003812055103480816,..."
2,coronaviru disinform creat challeng china gove...,ctrl,"[-0.004948284476995468, 0.002945489715784788, ..."
3,china coronaviru eat wild anim made illeg end ...,ctrl,"[0.005060594528913498, -0.01757677085697651, 0..."
4,china economi could shrink first time decad co...,ctrl,"[0.0027951218653470278, -0.009340660646557808,..."
...,...,...,...
11853,today world concern authoritarian racism natio...,instruct_gpt,"[-0.004592906218022108, -0.01598070189356804, ..."
11854,keep space station clean astronaut must practi...,instruct_gpt,"[0.02663489431142807, 0.0020218866411596537, 0..."
11855,citi requir tenant pay first month rent someti...,instruct_gpt,"[-0.007772138807922602, 0.010608934797346592, ..."
11856,it’ easi brows customis app get news way keep ...,instruct_gpt,"[-0.02405986562371254, 0.02182905748486519, 0...."


In [8]:
def convert_to_list(emb):
    return eval(emb)

df['ada_embedding'] = df['ada_embedding'].apply(convert_to_list)
df

Unnamed: 0,text,class,ada_embedding
0,latest headlin cnn busi tl dr u govern expect ...,ctrl,"[-0.0244539026170969, -0.015060365200042725, 0..."
1,china want take victori lap handl coronaviru o...,ctrl,"[0.0019263223512098193, -0.003812055103480816,..."
2,coronaviru disinform creat challeng china gove...,ctrl,"[-0.004948284476995468, 0.002945489715784788, ..."
3,china coronaviru eat wild anim made illeg end ...,ctrl,"[0.005060594528913498, -0.01757677085697651, 0..."
4,china economi could shrink first time decad co...,ctrl,"[0.0027951218653470278, -0.009340660646557808,..."
...,...,...,...
11853,today world concern authoritarian racism natio...,instruct_gpt,"[-0.004592906218022108, -0.01598070189356804, ..."
11854,keep space station clean astronaut must practi...,instruct_gpt,"[0.02663489431142807, 0.0020218866411596537, 0..."
11855,citi requir tenant pay first month rent someti...,instruct_gpt,"[-0.007772138807922602, 0.010608934797346592, ..."
11856,it’ easi brows customis app get news way keep ...,instruct_gpt,"[-0.02405986562371254, 0.02182905748486519, 0...."


In [9]:
for idx, doc in df.iterrows():
    if len(doc['ada_embedding']) == 0:
        print(idx)

7610
8254


In [10]:
df = df.drop([7610,8254])

for idx, doc in df.iterrows():
    if len(doc['ada_embedding']) == 0:
        print(idx)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11695 entries, 0 to 11857
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   text           11695 non-null  object
 1   class          11695 non-null  object
 2   ada_embedding  11695 non-null  object
dtypes: object(3)
memory usage: 365.5+ KB


### Splitting the data

In [50]:
X_train, X_test, y_train, y_test = train_test_split(df['ada_embedding'],
                                                    df['class'],
                                                    stratify = df['class'],
                                                    test_size = 0.2,
                                                    random_state = 1234)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(9356,) (2339,) (9356,) (2339,)


In [51]:
y_train.value_counts()

gpt3            960
xlnet           853
gpt2            853
gpt             853
fair            853
ctrl            853
xlm             853
grover          853
instruct_gpt    851
human           790
pplm            784
Name: class, dtype: int64

In [52]:
X_train = pd.DataFrame(X_train.tolist())
X_test = pd.DataFrame(X_test.tolist())
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(9356, 1536) (2339, 1536) (9356,) (2339,)


In [53]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,-0.006242,-0.001003,-0.000267,-0.033027,-0.016240,0.011234,0.002790,-0.000490,-0.030405,-0.006389,...,0.019450,-0.003674,0.020147,-0.005187,-0.021335,0.002739,-0.002652,0.011152,0.003348,-0.021089
1,-0.002220,-0.021099,0.024682,-0.027978,-0.014343,-0.009869,-0.003150,0.008908,-0.006920,-0.034407,...,0.022747,0.012640,0.032119,-0.026616,-0.023129,0.001225,0.004342,-0.019519,-0.003610,-0.016168
2,0.000682,-0.018135,-0.008832,-0.010734,-0.013994,0.024743,-0.034742,-0.017224,-0.038824,0.006498,...,0.014464,-0.028252,-0.005136,-0.013759,-0.031365,-0.008296,-0.004901,-0.009735,0.010771,-0.000556
3,0.013058,0.002242,0.017919,-0.042972,0.027818,0.024863,-0.018640,-0.009075,-0.027886,-0.027709,...,0.028757,-0.006427,0.031399,-0.029111,-0.016612,0.031644,-0.012806,-0.025571,0.004156,-0.020479
4,0.012985,0.007758,0.004444,-0.021076,0.005179,0.000529,-0.014795,-0.015530,-0.015585,-0.039240,...,0.020771,0.006954,0.011169,-0.016500,-0.017568,0.014725,-0.003688,0.003142,0.025804,-0.024529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9351,-0.015980,0.002475,0.017436,-0.030970,0.008134,0.003645,-0.006138,-0.019992,-0.008594,-0.014894,...,0.019593,0.007004,0.027027,-0.045892,-0.024636,-0.018755,-0.008691,-0.012394,0.005166,-0.039764
9352,-0.016601,-0.021492,0.003462,-0.026522,-0.011612,-0.011410,-0.010634,-0.005859,-0.033453,-0.020528,...,0.012528,0.018711,0.018669,-0.043543,-0.036025,0.006533,-0.017188,-0.010746,0.001604,-0.026467
9353,0.001744,-0.014199,0.021983,-0.012368,0.005380,0.038053,-0.042719,-0.022553,-0.029917,-0.018403,...,0.012565,-0.008856,0.040359,-0.010144,-0.004119,0.010815,0.001682,-0.013243,0.007215,-0.018796
9354,-0.005976,-0.011918,-0.001191,-0.016154,0.015759,0.019648,-0.002475,-0.005099,-0.008240,-0.020532,...,-0.017894,-0.004813,-0.000516,-0.015039,-0.016276,0.023768,-0.006833,-0.004321,0.018397,-0.017323


### Model Training

In [19]:
# def visualizer(clf):
#     visualizer = ClassPredictionError(clf, classes= [0, 1])

#     # Fit the training data to the visualizer
#     visualizer.fit(X_train, y_train)

#     # Evaluate the model on the test data
#     visualizer.score(X_test, y_test)

#     # Draw visualization
#     visualizer.show()
    
def metrics(pred):
    classes = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt']
    matrix = confusion_matrix(y_test, pred, labels = classes)
    mat = matrix.diagonal()/matrix.sum(axis=1)
    print(classification_report(y_test, pred, labels = classes,
                                digits=4))

    print('confusion matrix: ', mat)

    Accuracy = accuracy_score(y_test,pred)
    F1 = f1_score(y_test, pred, average='macro')
    print("Accuracy:", Accuracy)

    rec = recall_score(y_test, pred, average='macro')
    print('Recall: ', rec)
    prec = precision_score(y_test, pred, average='macro')
    print('Precision: ', prec)

    print('F1:', F1)
    
def get_predictions(tpr, fpr, threshold, ypred):
    #If tpr is hight & fpr is low : (tpr*(1-fpr)) is maximum
    actual_ypred = []

    thres = threshold[np.argmax(tpr * (1 - fpr))]
    for value in ypred:
        if value < thres:
            actual_ypred.append(0)
        else:
            actual_ypred.append(1)
    return actual_ypred
    
def get_roc_curve(model_name, classifier):
    
    Ypred_train = classifier.predict(X_train)
    fpr_train, tpr_train, threshold_train = roc_curve(y_train, Ypred_train)
    
    Ypred_test = classifier.predict(X_test)
    fpr_test, tpr_test, threshold_test = roc_curve(y_test, Ypred_test)

    auc_train = round(auc(fpr_train, tpr_train), 4)
    auc_test = round(auc(fpr_test, tpr_test), 4)

    plt.rcParams["figure.figsize"] = [5, 4]
    plt.plot(fpr_train, tpr_train, label = f'train AUC : {auc_train}')
    plt.plot(fpr_test, tpr_test, label = f'test AUC : {auc_test}')
    plt.legend()
    plt.grid()
    plt.title(f'{model_name} ROC Curve')
    plt.xlabel('parameter') 
    plt.ylabel('AUC')
    plt.show()

    roc_results = {
    'ypred_train' : Ypred_train,
    'ypred_test' : Ypred_test,
    'fpr_train' : fpr_train,
    'fpr_test' : fpr_test,
    'tpr_train' : tpr_train,
    'tpr_test' : tpr_test,
    'threshold_train' : threshold_train,
    'threshold_test' : threshold_test
    }
    return roc_results

def get_confusion_matrix(model_name, roc_results):
    actual_ypred_train = get_predictions(roc_results['tpr_train'],
                                         roc_results['fpr_train'],
                                         roc_results['threshold_train'],
                                         roc_results['ypred_train'])

    matrix_train = confusion_matrix(y_train, actual_ypred_train)

    actual_ypred_test = get_predictions(roc_results['tpr_test'],
                                        roc_results['fpr_test'],
                                        roc_results['threshold_test'],
                                        roc_results['ypred_test'])

    matrix_test = confusion_matrix(y_test, actual_ypred_test)

    plt.rcParams["figure.figsize"] = [12, 5]
    plt.rcParams["figure.autolayout"] = True
    f, axes = plt.subplots(1, 2)
    
    axes[0].title.set_text(f'{model_name} Training Confusion Matrix')
    sns.heatmap(matrix_train, annot = True, ax = axes[0], fmt = "d")


    sns.heatmap(matrix_test, annot = True, ax = axes[1], fmt = "d")
    axes[1].title.set_text(f'{model_name} Testing Confusion Matrix')

    return actual_ypred_test, actual_ypred_train

### Random Forest Classifier

In [17]:
randomforest_model = RandomForestClassifier()

parameters = { 'max_depth' : [10, 20, 30],
               'n_estimators' : [90, 150, 180],
               'max_samples' : [0.6, 0.8]
 }

cross_validation = 3
scoring_metric = "f1"
randomforest_model_cv = GridSearchCV(randomforest_model, 
                                     parameters,
                                     cv = cross_validation,
                                     scoring = scoring_metric,
                                     return_train_score=True)

randomforest_model_cv.fit(X_train, y_train)
print('Best Params ', randomforest_model_cv.best_params_)

Best Params  {'max_depth': 10, 'max_samples': 0.6, 'n_estimators': 90}


In [27]:
parameters = {
     'max_depth' : 10,
     'n_estimators' : 180,
     'max_samples' : 0.8
}
randomforest_model = RandomForestClassifier(max_depth = parameters['max_depth'],
                                            max_samples = parameters['max_samples'],
                                            n_estimators = parameters['n_estimators'])

randomforest_model.fit(X_train, y_train)
pred = randomforest_model.predict(X_test)

In [28]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.5257    0.6714    0.5897       213
         gpt     0.8039    0.9624    0.8761       213
        gpt2     0.4091    0.2958    0.3433       213
      grover     0.3289    0.2347    0.2740       213
         xlm     0.8795    0.9249    0.9016       213
       xlnet     0.7244    0.8638    0.7880       213
        pplm     0.6531    0.3249    0.4339       197
       human     0.5046    0.2778    0.3583       198
        fair     0.3282    0.3005    0.3137       213
        gpt3     0.3981    0.6917    0.5053       240
instruct_gpt     0.4354    0.4272    0.4313       213

    accuracy                         0.5481      2339
   macro avg     0.5446    0.5432    0.5287      2339
weighted avg     0.5425    0.5481    0.5301      2339

confusion matrix:  [0.6713615  0.96244131 0.29577465 0.23474178 0.92488263 0.86384977
 0.3248731  0.27777778 0.30046948 0.69166667 0.42723005]
Accuracy: 0.5480974775545104
Recall:  0.5431

### Logistic Regression

In [45]:
logistic_model = LogisticRegression()
parameters = { 'C' : [0.01, 0.1, 1, 3, 10],
               'penalty' : ['l2', 'elasticnet']
             }

cross_validation = 3
scoring_metric = "f1"

logistic_model_cv = GridSearchCV(logistic_model,
                                 parameters,
                                 cv = cross_validation,
                                 scoring = scoring_metric,
                                 return_train_score=True)

logistic_model_cv.fit(X_train, y_train)
print('Best Params ', logistic_model_cv.best_params_)

Best Params  {'C': 10, 'penalty': 'l2'}


In [25]:
parameters = { 'C' : 10,
               'penalty' : 'l2'
 }

logistic_model = LogisticRegression(C = parameters['C'], penalty = parameters['penalty'])
logistic_model.fit(X_train, y_train)
pred = logistic_model.predict(X_test)

In [26]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.9393    0.9437    0.9415       213
         gpt     0.9593    0.9953    0.9770       213
        gpt2     0.6650    0.6150    0.6390       213
      grover     0.6538    0.6385    0.6461       213
         xlm     0.9858    0.9765    0.9811       213
       xlnet     0.9671    0.9671    0.9671       213
        pplm     0.7143    0.6599    0.6860       197
       human     0.7473    0.6869    0.7158       198
        fair     0.5652    0.6714    0.6137       213
        gpt3     0.6838    0.6667    0.6751       240
instruct_gpt     0.6652    0.6995    0.6819       213

    accuracy                         0.7747      2339
   macro avg     0.7769    0.7746    0.7749      2339
weighted avg     0.7764    0.7747    0.7748      2339

confusion matrix:  [0.94366197 0.99530516 0.61502347 0.63849765 0.97652582 0.96713615
 0.65989848 0.68686869 0.6713615  0.66666667 0.69953052]
Accuracy: 0.774690038477982
Recall:  0.77458

### XG Boost

In [29]:
import xgboost

xgboost_model = xgboost.XGBClassifier()

parameters = { 'max_depth' : [10, 20, 30],
               'n_estimators' : [90, 150, 180],
               'min_child_weight' : [1, 5, 10 ]
 }

cross_validation = 3
scoring_metric = "f1"
xgboost_model_cv = GridSearchCV(xgboost_model, 
                                parameters,
                                cv = cross_validation,
                                scoring = scoring_metric,
                                return_train_score=True)

xgboost_model_cv.fit(X_train, y_train)
print('Best Params ', xgboost_model_cv.best_params_)


KeyboardInterrupt



In [35]:
from sklearn.preprocessing import LabelEncoder
import xgboost
# encode class labels into integers
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

xgboost_model = xgboost.XGBClassifier()


parameters = { 'max_depth' : 20,
               'min_child_weight' : 5,
               'n_estimators' : 180
 }

xgboost_model = xgboost.XGBClassifier(max_depth = parameters['max_depth'],
                                      min_child_weight = parameters['min_child_weight'],
                                      n_estimators = parameters['n_estimators'])
xgboost_model.fit(X_train, y_train)
Ypredtest = xgboost_model.predict(X_test)

In [40]:
class_labels = encoder.classes_
class_to_int = dict(zip(class_labels, range(len(class_labels))))
print(class_to_int)

{'ctrl': 0, 'fair': 1, 'gpt': 2, 'gpt2': 3, 'gpt3': 4, 'grover': 5, 'human': 6, 'instruct_gpt': 7, 'pplm': 8, 'xlm': 9, 'xlnet': 10}


In [45]:
list(class_to_int.values())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [36]:
pred = xgboost_model.predict(X_test)

In [47]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = list(class_to_int.values()))
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = list(class_to_int.values()),
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

           0     0.7808    0.8028    0.7917       213
           1     0.4231    0.4131    0.4181       213
           2     0.9502    0.9859    0.9677       213
           3     0.4912    0.5258    0.5079       213
           4     0.5992    0.6167    0.6078       240
           5     0.4508    0.4085    0.4286       213
           6     0.5978    0.5404    0.5676       198
           7     0.5855    0.6432    0.6130       213
           8     0.6022    0.5533    0.5767       197
           9     0.9263    0.9437    0.9349       213
          10     0.9104    0.9061    0.9082       213

    accuracy                         0.6682      2339
   macro avg     0.6652    0.6672    0.6657      2339
weighted avg     0.6653    0.6682    0.6662      2339

confusion matrix:  [0.8028169  0.41314554 0.98591549 0.5258216  0.61666667 0.4084507
 0.54040404 0.64319249 0.55329949 0.94366197 0.90610329]
Accuracy: 0.6682342881573322
Recall:  0.66722

### SVM

In [60]:
svm_model = SVC()

parameters = { 'kernel' : ['poly', 'rbf', 'sigmoid'],
               'max_iter' : [20, 50, 100]
 }

cross_validation = 3
scoring_metric = "f1"
svm_model_cv = GridSearchCV(svm_model, 
                            parameters,
                            cv = cross_validation,
                            scoring = scoring_metric,
                            return_train_score=True)

svm_model_cv.fit(X_train, y_train)
print('Best Params ', svm_model_cv.best_params_)

Best Params  {'kernel': 'poly', 'max_iter': 100}


In [33]:
parameters = {
         'kernel' : 'poly',
         'max_iter' : 100,
        }

svm_model = SVC(kernel = parameters['kernel'],
max_iter = parameters['max_iter'], probability = True)

svm_model.fit(X_train, y_train)
pred = svm_model.predict(X_test)

In [34]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.9275    0.8404    0.8818       213
         gpt     0.9680    0.9953    0.9815       213
        gpt2     0.5537    0.4601    0.5026       213
      grover     0.5268    0.5070    0.5167       213
         xlm     0.9953    0.9859    0.9906       213
       xlnet     0.9621    0.9531    0.9575       213
        pplm     0.4349    0.6447    0.5194       197
       human     0.5639    0.6465    0.6024       198
        fair     0.4417    0.3380    0.3830       213
        gpt3     0.6774    0.5250    0.5915       240
instruct_gpt     0.5922    0.7089    0.6453       213

    accuracy                         0.6900      2339
   macro avg     0.6949    0.6914    0.6884      2339
weighted avg     0.6973    0.6900    0.6890      2339

confusion matrix:  [0.84037559 0.99530516 0.4600939  0.50704225 0.98591549 0.95305164
 0.64467005 0.64646465 0.33802817 0.525      0.70892019]
Accuracy: 0.6900384779820437
Recall:  0.6913

### Naive Bayes

In [56]:
bnb_model = BernoulliNB()
bnb_model.fit(X_train, y_train)
pred = bnb_model.predict(X_test)

In [57]:
from sklearn.metrics import classification_report, roc_curve, auc, f1_score, accuracy_score, confusion_matrix
matrix = confusion_matrix(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover', 'xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'])
mat = matrix.diagonal()/matrix.sum(axis=1)
print(classification_report(y_test, pred, labels = ['ctrl', 'gpt', 'gpt2', 'grover','xlm', 'xlnet', 'pplm', 'human', 'fair', 'gpt3', 'instruct_gpt'],
                            digits=4))
print('confusion matrix: ', mat)

Accuracy = accuracy_score(y_test,pred)
F1 = f1_score(y_test, pred, average='macro')
print("Accuracy:", Accuracy)

rec = recall_score(y_test, pred, average='macro')
print('Recall: ', rec)
prec = precision_score(y_test, pred, average='macro')
print('Precision: ', prec)

print('F1:', F1)

              precision    recall  f1-score   support

        ctrl     0.6445    0.6385    0.6415       213
         gpt     0.8744    0.9155    0.8945       213
        gpt2     0.4475    0.3803    0.4112       213
      grover     0.3102    0.3146    0.3124       213
         xlm     0.9239    0.8545    0.8878       213
       xlnet     0.8243    0.8592    0.8414       213
        pplm     0.4677    0.4416    0.4543       197
       human     0.4141    0.4747    0.4424       198
        fair     0.3458    0.3474    0.3466       213
        gpt3     0.5430    0.5000    0.5206       240
instruct_gpt     0.4606    0.5211    0.4890       213

    accuracy                         0.5686      2339
   macro avg     0.5687    0.5679    0.5674      2339
weighted avg     0.5701    0.5686    0.5685      2339

confusion matrix:  [0.63849765 0.91549296 0.38028169 0.31455399 0.85446009 0.85915493
 0.44162437 0.47474747 0.34741784 0.5        0.52112676]
Accuracy: 0.5686190679777683
Recall:  0.5679

In [58]:
import tabulate
print('Task P3 : Open AI Embeddings - Model text-embedding-ada-002')
conclusion = [['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'Train AUC', 'Test AUC', ],
             ['Logistic Regression', 77.4, 77.7, 77.5, 77.5],
             ['Random Forest Classifier', 55.1, 54.5, 54.3, 53.7],
             ['XGBoost Classifier',  66.8, 66.5, 66.7, 66.5],
             ['Naive Bayes Classifier',  56.8, 56.9, 56.8, 56.7],
             ['SVM Classifier',  69.0, 69.4, 69.1, 68.8],
             ]
print(tabulate.tabulate(conclusion, tablefmt='fancy_grid'))

Task P3 : Open AI Embeddings - Model text-embedding-ada-002
╒══════════════════════════╤══════════╤═══════════╤════════╤══════════╤═══════════╤══════════╕
│ Model                    │ Accuracy │ Precision │ Recall │ F1 Score │ Train AUC │ Test AUC │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┼───────────┼──────────┤
│ Logistic Regression      │ 77.4     │ 77.7      │ 77.5   │ 77.5     │           │          │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┼───────────┼──────────┤
│ Random Forest Classifier │ 55.1     │ 54.5      │ 54.3   │ 53.7     │           │          │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┼───────────┼──────────┤
│ XGBoost Classifier       │ 66.8     │ 66.5      │ 66.7   │ 66.5     │           │          │
├──────────────────────────┼──────────┼───────────┼────────┼──────────┼───────────┼──────────┤
│ Naive Bayes Classifier   │ 56.8     │ 56.9      │ 56.8   │ 56.7     │           │  