# Model Analysis

In [1]:
import pandas as pd
from numpy import loadtxt
from sklearn.metrics import confusion_matrix, classification_report, recall_score, accuracy_score
import matplotlib.pyplot as plt 

In [2]:
def read_preds(pred_file):
    preds = loadtxt(pred_file, dtype= str, delimiter='\n', unpack=False)
    return preds

In [3]:
def read_pred_probs(pred_file):
    pred_prob = pd.read_csv(pred_file)
    return pred_prob.apply(lambda x : x.max(), axis=1)

In [4]:
def read_y_test(y_test_file):
    y_test = pd.read_csv(y_test_file)
    return y_test

In [5]:
def get_confusion_matrix(y_test,preds):
    labels=['0_background','1_chimpanze']
    cm = confusion_matrix(y_test,preds)

    print(cm)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm)
    fig.colorbar(cax)
    ax.set_xticklabels([''] + labels)
    ax.set_yticklabels([''] + labels)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=90)
    plt.show()

In [6]:
def get_classification_report(y_test, preds):
    cr = classification_report(y_test, preds , output_dict=True)
    return pd.DataFrame(cr).transpose()

In [7]:
def get_selected_val(values):
    l = values.split(' ')
    lf = list(map(float, l))
    return lf.index(max(lf))

## Standard CNN (Sanctuary)
### Train

In [18]:
cnn_pred = read_preds('../models/sanctuary_cnn/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_cnn/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.975801,0.993128,0.984388,16444.0
1,0.966765,0.890303,0.92696,3692.0
accuracy,0.974275,0.974275,0.974275,0.974275
macro avg,0.971283,0.941716,0.955674,20136.0
weighted avg,0.974144,0.974275,0.973859,20136.0


### Test

In [19]:
cnn_pred = read_preds('../models/sanctuary_cnn/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_cnn/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.916108,0.716219,0.803925,68870.0
1,0.682748,0.903021,0.777586,46577.0
accuracy,0.791584,0.791584,0.791584,0.791584
macro avg,0.799428,0.80962,0.790755,115447.0
weighted avg,0.821959,0.791584,0.793298,115447.0


## Standard CNN (Synthetic)

### Train

In [20]:
cnn_pred = read_preds('../models/synthetic_cnn/_predictions.txt')
cnn_y = read_y_test('../models/synthetic_cnn/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.97241,0.996743,0.984426,19342.0
1,0.995535,0.962521,0.97875,14595.0
accuracy,0.982026,0.982026,0.982026,0.982026
macro avg,0.983973,0.979632,0.981588,33937.0
weighted avg,0.982355,0.982026,0.981985,33937.0


### Test

In [21]:
cnn_pred = read_preds('../models/synthetic_cnn/test/_predictions.txt')
cnn_y = read_y_test('../models/synthetic_cnn/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.880887,0.809017,0.843424,68870.0
1,0.748008,0.838246,0.79056,46577.0
accuracy,0.82081,0.82081,0.82081,0.82081
macro avg,0.814447,0.823632,0.816992,115447.0
weighted avg,0.827277,0.82081,0.822096,115447.0


## Standard CNN (Sanctuary + Synthetic)

### Train

In [22]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.985299,0.934991,0.959486,39210.0
1,0.87522,0.970314,0.920317,18426.0
accuracy,0.946284,0.946284,0.946284,0.946284
macro avg,0.93026,0.952652,0.939902,57636.0
weighted avg,0.950107,0.946284,0.946964,57636.0


### Test

In [23]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.863135,0.849412,0.856219,68870.0
1,0.782451,0.800846,0.791542,46577.0
accuracy,0.829818,0.829818,0.829818,0.829818
macro avg,0.822793,0.825129,0.82388,115447.0
weighted avg,0.830583,0.829818,0.830125,115447.0


## CNN10 (Sanctuary)
### Train

In [24]:
cnn_pred = read_preds('../models/sanctuary_cnn10/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_cnn10/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.992271,0.991486,0.991878,16444.0
1,0.962213,0.965601,0.963904,3692.0
accuracy,0.98674,0.98674,0.98674,0.98674
macro avg,0.977242,0.978544,0.977891,20136.0
weighted avg,0.98676,0.98674,0.986749,20136.0


### Test

In [25]:
cnn_pred = read_preds('../models/sanctuary_cnn10/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_cnn10/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.908391,0.767998,0.832316,68870.0
1,0.720767,0.88548,0.794678,46577.0
accuracy,0.815396,0.815396,0.815396,0.815396
macro avg,0.814579,0.826739,0.813497,115447.0
weighted avg,0.832694,0.815396,0.817131,115447.0


## CNN10 (Synthetic)
### Train

In [26]:
cnn_pred = read_preds('../models/synthetic_cnn10/_predictions.txt')
cnn_y = read_y_test('../models/synthetic_cnn10/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.994531,0.996639,0.995584,19342.0
1,0.995534,0.992737,0.994134,14595.0
accuracy,0.994961,0.994961,0.994961,0.994961
macro avg,0.995033,0.994688,0.994859,33937.0
weighted avg,0.994962,0.994961,0.99496,33937.0


### Test

In [27]:
cnn_pred = read_preds('../models/synthetic_cnn10/test/_predictions.txt')
cnn_y = read_y_test('../models/synthetic_cnn10/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.92515,0.789487,0.851952,68870.0
1,0.744195,0.905554,0.816984,46577.0
accuracy,0.836314,0.836314,0.836314,0.836314
macro avg,0.834673,0.847521,0.834468,115447.0
weighted avg,0.852144,0.836314,0.837844,115447.0


## CNN10 (Sanctuary + Synthetic)
### Train

In [28]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn10/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn10/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.994109,0.994236,0.994173,39210.0
1,0.987731,0.987463,0.987597,18426.0
accuracy,0.992071,0.992071,0.992071,0.992071
macro avg,0.99092,0.99085,0.990885,57636.0
weighted avg,0.99207,0.992071,0.992071,57636.0


### Test

In [29]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn10/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn10/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.877069,0.893205,0.885063,68870.0
1,0.837674,0.814887,0.826123,46577.0
accuracy,0.861607,0.861607,0.861607,0.861607
macro avg,0.857371,0.854046,0.855593,115447.0
weighted avg,0.861175,0.861607,0.861284,115447.0


## CNN10 (Sanctuary + Synthetic) -Reg
### Train

In [44]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn10_reg/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn10_reg/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.941289,0.940857,0.941073,39210.0
1,0.874268,0.875122,0.874695,18426.0
accuracy,0.919842,0.919842,0.919842,0.919842
macro avg,0.907779,0.90799,0.907884,57636.0
weighted avg,0.919863,0.919842,0.919852,57636.0


### Test

In [42]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn10_reg/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn10_reg/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.886993,0.89955,0.893227,68870.0
1,0.848296,0.830539,0.839323,46577.0
accuracy,0.871707,0.871707,0.871707,0.871707
macro avg,0.867644,0.865044,0.866275,115447.0
weighted avg,0.871381,0.871707,0.87148,115447.0


## CNN8 (Sanctuary + Synthetic)
### Train

In [38]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn8/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn8/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.993037,0.99291,0.992973,39210.0
1,0.984917,0.985184,0.98505,18426.0
accuracy,0.99044,0.99044,0.99044,0.99044
macro avg,0.988977,0.989047,0.989012,57636.0
weighted avg,0.990441,0.99044,0.99044,57636.0


### Test

In [39]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn8/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn8/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.885086,0.86505,0.874953,68870.0
1,0.806922,0.833931,0.820204,46577.0
accuracy,0.852495,0.852495,0.852495,0.852495
macro avg,0.846004,0.849491,0.847579,115447.0
weighted avg,0.853551,0.852495,0.852865,115447.0


## CNN8 (Sanctuary + Synthetic) -Reg
### Train

In [16]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn8_reg/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn8_reg/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.923382,0.955904,0.939362,39210.0
1,0.898563,0.831217,0.863579,18426.0
accuracy,0.916042,0.916042,0.916042,0.916042
macro avg,0.910972,0.89356,0.90147,57636.0
weighted avg,0.915447,0.916042,0.915134,57636.0


### Test

In [17]:
cnn_pred = read_preds('../models/sanctuary_synthetic_cnn8_reg/test/_predictions.txt')
cnn_y = read_y_test('../models/sanctuary_synthetic_cnn8_reg/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.827491,0.959779,0.888739,68870.0
1,0.922119,0.704146,0.798525,46577.0
accuracy,0.856644,0.856644,0.856644,0.856644
macro avg,0.874805,0.831963,0.843632,115447.0
weighted avg,0.865669,0.856644,0.852342,115447.0
