In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt 
import numpy as np 
%matplotlib inline
import os.path as pt
import pandas as pd 
from tensorflow.python.keras.metrics import Metric
import tensorflow.keras.backend as K
from sklearn.metrics import fbeta_score
from sklearn.metrics import make_scorer
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

## Useful functions

In [None]:
# will be used for biclassification
def print_metrics (metrics,preds,test_labels):
    precision = metrics [3]
    recall = metrics[4]
    accuracy=metrics[1]
    area_under_ROC=metrics[2]
    loss = metrics [0]
    
    # we will use F2 score which is more interseting because we want to give more importance to recall than precision
    f_score= fbeta_score(test_labels,preds,beta=2)
    f1_score=fbeta_score(test_labels,preds,beta=1)
    f5_score=fbeta_score(test_labels,preds,beta=5)
    
    print ('precision is ',precision)
    print ('recall is',recall)
    print ('accuracy is',accuracy)
    print ('loss is',loss)
    print ('area under ROC is',area_under_ROC)
    print ('f1_score is ', f1_score)
    print ('f2_score is ', f_score)
    print ('f5_score is ', f5_score)


## will be used for multiclassification    
def print_metrics_multi (metrics):
    precision = metrics [3]
    recall = metrics[4]
    accuracy=metrics[1]
    area_under_ROC=metrics[2]
    loss = metrics [0]
    
    def f_beta_score (precision , recall , beta):
        return (1+beta**2) * precision* recall /(recall+(beta**2)*precision)
    f_score= f_beta_score(precision,recall,beta=2)
    f1_score=f_beta_score(precision,recall,beta=1)
    f5_score=f_beta_score(precision,recall,beta=5)
    
    
    print ('precision is ',precision)
    print ('recall is',recall)
    print ('accuracy is',accuracy)
    print ('loss is',loss)
    print ('area under ROC is',area_under_ROC)
    print ('f1_score is ', f1_score)
    print ('f2_score is ', f_score)
    print ('f5_score is ', f5_score)

## Biclassifcation evaluation

In [None]:
# load model
biclassifier = keras.models.load_model("../models/biclassification.h5")

In [None]:
test_labels = pd . read_csv ("../data/wikigold/bilass-eng.train.true_labels.csv",header=None)
data_test = pd .read_csv ( "../data/wikigold/representation.train.csv",header=None)
scaler1 = StandardScaler()
data_test=scaler1.fit_transform (data_test)

In [None]:
data_test.shape

(5000, 768)

In [None]:
test_labels.shape

(5000, 1)

In [None]:
preds = biclassifier.predict (data_test)
## we will say that the prediction is labeled one when its probability exceeds the threshold = 0.5
THRESHOLD= 0.5 
preds= (preds>THRESHOLD)

In [None]:
metrics = biclassifier.evaluate (data_test,test_labels )
print_metrics(metrics,preds,test_labels)

precision is  0.7854251265525818
recall is 0.9371980428695679
accuracy is 0.9868000149726868
loss is 0.06491153687238693
area under ROC is 0.9796316027641296
f1_score is  0.854625550660793
f2_score is  0.9023255813953489
f5_score is  0.9302840280339358


In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_labels,preds)
print ('the confusion matrix is \n ',cm)

the confusion matrix is 
  [[4740   53]
 [  13  194]]


## Multiclassification evaluation

In [None]:
multiclassifier = keras.models.load_model("../models/multiclass.h5")

In [None]:
test_labels = pd . read_csv ("../data/wikigold/multiclass-eng.train.true_labels.csv",header=None)
data_test = pd .read_csv ( "../data/wikigold/representation.train.csv",header=None)
scaler1 = StandardScaler()
data_test=scaler1.fit_transform (data_test)

In [None]:
test_labels.shape

(5000, 5)

In [38]:
data_test.shape

(5000, 768)

In [39]:
preds = multiclassifier.predict (data_test)
## we will say that the prediction is labeled one when its probability exceeds the threshold = 0.5
THRESHOLD= 0.5 
preds= (preds>THRESHOLD)

In [40]:
metrics = multiclassifier.evaluate (data_test,test_labels )
print_metrics_multi(metrics)

precision is  0.919169545173645
recall is 0.9120000004768372
accuracy is 0.9143999814987183
loss is 0.43730849027633667
area under ROC is 0.9773646593093872
f1_score is  0.9155707374315001
f2_score is  0.9134249478165171
f5_score is  0.9122736834310038
