### Notebook to import results of model run and create results report

In [897]:
import os
import random
import numpy as np
import pickle as pkl
import scipy.sparse as sp
import sys
from tqdm import tqdm

In [898]:
with open("data/ind.question1_sub_bal.ty", 'rb') as f:    
    y_true = pkl.load(f)

In [899]:
y_true = np.argmax(y_true, axis=1)

In [900]:
y_true

array([0, 0, 1, ..., 0, 0, 1], dtype=int64)

In [901]:
y_pred = np.loadtxt("gnn_pred4_win8.csv")

In [902]:
y_pred

array([0., 0., 1., ..., 0., 0., 1.])

In [903]:
ind_short = np.loadtxt("short_sents.txt").astype(np.int)

In [904]:
ind_short

array([   0,    1,    2, ..., 9985, 9992, 9993])

In [905]:
y_true_short = y_true[ind_short]

In [906]:
y_true_short.shape

(4756,)

In [907]:
y_pred_short = y_pred[ind_short]

In [908]:
ind_medium = np.loadtxt("medium_sents.txt").astype(np.int)

In [909]:
y_true_medium = y_true[ind_medium]

In [910]:
y_pred_medium = y_pred[ind_medium]

In [911]:
ind_long = np.loadtxt("long_sents.txt").astype(np.int)

In [912]:
y_true_long = y_true[ind_long]

In [913]:
y_pred_long = y_pred[ind_long]

In [914]:
ind_mixed = np.loadtxt("unmatched_len.txt").astype(np.int)

In [915]:
y_true_mixed = y_true[ind_mixed]

In [916]:
y_pred_mixed = y_pred[ind_mixed]

In [917]:
ind_inductive = np.loadtxt("inductive_ind_15.txt").astype(np.int)

In [918]:
y_true_inductive = y_true[ind_inductive]

In [919]:
y_pred_inductive = y_pred[ind_inductive]

In [920]:
y_true_n_inductive = y_true[[i for i in range(9999) if i not in ind_inductive]]

In [921]:
y_pred_n_inductive = y_pred[[i for i in range(9999) if i not in ind_inductive]]

In [922]:
from sklearn import metrics

In [923]:
print(metrics.classification_report(y_true_short, y_pred_short, digits=4))

              precision    recall  f1-score   support

           0     0.8891    0.7244    0.7983      2910
           1     0.6637    0.8575    0.7483      1846

    accuracy                         0.7761      4756
   macro avg     0.7764    0.7910    0.7733      4756
weighted avg     0.8016    0.7761    0.7789      4756



In [924]:
print(metrics.classification_report(y_true_medium, y_pred_medium, digits=4))

              precision    recall  f1-score   support

           0     0.9066    0.7430    0.8167      1490
           1     0.6749    0.8746    0.7619       909

    accuracy                         0.7928      2399
   macro avg     0.7908    0.8088    0.7893      2399
weighted avg     0.8188    0.7928    0.7959      2399



In [925]:
print(metrics.classification_report(y_true_long, y_pred_long, digits=4))

              precision    recall  f1-score   support

           0     0.9167    0.7857    0.8462        14
           1     0.6667    0.8571    0.7500         7

    accuracy                         0.8095        21
   macro avg     0.7917    0.8214    0.7981        21
weighted avg     0.8333    0.8095    0.8141        21



In [926]:
print(metrics.classification_report(y_true_mixed, y_pred_mixed, digits=4))

              precision    recall  f1-score   support

           0     0.8962    0.7536    0.8187      1798
           1     0.6608    0.8461    0.7420      1020

    accuracy                         0.7871      2818
   macro avg     0.7785    0.7998    0.7804      2818
weighted avg     0.8110    0.7871    0.7910      2818



In [927]:
print(metrics.classification_report(y_true_inductive, y_pred_inductive, digits=4))

              precision    recall  f1-score   support

           0     0.9336    0.7276    0.8178       290
           1     0.6638    0.9123    0.7685       171

    accuracy                         0.7961       461
   macro avg     0.7987    0.8199    0.7932       461
weighted avg     0.8336    0.7961    0.7995       461



In [928]:
print(metrics.classification_report(y_true_n_inductive, y_pred_n_inductive, digits=4))

              precision    recall  f1-score   support

           0     0.8962    0.7286    0.8038      5925
           1     0.6594    0.8616    0.7471      3613

    accuracy                         0.7790      9538
   macro avg     0.7778    0.7951    0.7754      9538
weighted avg     0.8065    0.7790    0.7823      9538



In [929]:
print(metrics.classification_report(y_true, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.8955    0.7376    0.8089      6215
           1     0.6658    0.8586    0.7500      3784

    accuracy                         0.7834      9999
   macro avg     0.7806    0.7981    0.7794      9999
weighted avg     0.8086    0.7834    0.7866      9999



In [930]:
y_true_short.shape, y_true_medium.shape, y_true_long.shape, y_true_mixed.shape, y_true.shape

((4756,), (2399,), (21,), (2818,), (9999,))

In [931]:
ind_inductive

array([   8,   15,   23,   25,   62,   70,   74,   85,  106,  107,  113,
        138,  143,  144,  183,  187,  211,  233,  276,  458,  481,  484,
        498,  522,  531,  558,  573,  587,  589,  646,  695,  786,  794,
        820,  840,  888,  895,  916,  968,  988, 1011, 1019, 1028, 1042,
       1053, 1100, 1116, 1125, 1152, 1161, 1166, 1193, 1240, 1242, 1243,
       1277, 1328, 1363, 1366, 1427, 1439, 1442, 1451, 1453, 1462, 1598,
       1603, 1660, 1676, 1679, 1685, 1693, 1755, 1759, 1775, 1776, 1781,
       1788, 1830, 1898, 1900, 1944, 1969, 1978, 2002, 2006, 2007, 2048,
       2053, 2064, 2079, 2087, 2090, 2096, 2148, 2197, 2230, 2345, 2385,
       2396, 2397, 2435, 2445, 2456, 2458, 2478, 2652, 2692, 2738, 2744,
       2751, 2765, 2830, 2865, 2882, 2896, 2897, 2908, 2910, 2920, 2924,
       2937, 2979, 2980, 2988, 3012, 3021, 3026, 3049, 3050, 3112, 3151,
       3165, 3198, 3212, 3242, 3290, 3303, 3345, 3365, 3413, 3418, 3424,
       3451, 3526, 3539, 3542, 3569, 3602, 3608, 36

In [932]:
ind_short, ind_medium, ind_long, ind_mixed

(array([   0,    1,    2, ..., 9985, 9992, 9993]),
 array([   4,    5,   13, ..., 9988, 9989, 9991]),
 array([   7,  306,  790,  795, 1408, 1970, 2362, 3235, 3238, 3260, 3316,
        5128, 5175, 5585, 8165, 8171, 8274, 8497, 8604, 9424, 9779]),
 array([   6,    8,   16, ..., 9981, 9983, 9990]))

In [933]:
y_pred_gnn = np.loadtxt("gnn_pred4_win4.csv")

In [934]:
y_pred_dnn = np.loadtxt("dnn_pred.csv")

In [935]:
indices = np.where(y_pred_gnn != y_pred_dnn)[0]

In [936]:
y_pred_gnn_dif = y_pred_gnn[indices]

In [937]:
y_pred_dnn_dif = y_pred_dnn[indices]

In [938]:
y_true_diff = y_true[indices]

In [939]:
print(metrics.classification_report(y_true_diff, y_pred_gnn_dif, digits=4))

              precision    recall  f1-score   support

           0     0.6991    0.6900    0.6945      1145
           1     0.2843    0.2931    0.2886       481

    accuracy                         0.5726      1626
   macro avg     0.4917    0.4915    0.4916      1626
weighted avg     0.5764    0.5726    0.5744      1626



In [940]:
print(metrics.classification_report(y_true_diff, y_pred_dnn_dif, digits=4))

              precision    recall  f1-score   support

           0     0.7157    0.3100    0.4327      1145
           1     0.3009    0.7069    0.4221       481

    accuracy                         0.4274      1626
   macro avg     0.5083    0.5085    0.4274      1626
weighted avg     0.5930    0.4274    0.4295      1626



In [941]:
np.where(y_pred_gnn_dif == y_true_diff)[0].shape

(931,)

In [942]:
np.where(y_pred_dnn_dif == y_true_diff)[0]

array([   0,    2,    5,    8,    9,   10,   11,   12,   13,   18,   19,
         22,   23,   24,   25,   26,   27,   31,   34,   36,   38,   39,
         40,   41,   42,   43,   45,   46,   47,   50,   52,   55,   60,
         61,   62,   64,   65,   69,   70,   73,   75,   77,   78,   83,
         84,   85,   87,   88,   90,   92,   94,   98,   99,  100,  106,
        107,  108,  109,  110,  111,  114,  117,  123,  126,  128,  129,
        131,  132,  133,  135,  137,  139,  141,  142,  145,  146,  152,
        153,  158,  161,  162,  166,  169,  170,  175,  177,  178,  179,
        182,  183,  184,  186,  189,  191,  194,  196,  197,  200,  201,
        203,  204,  207,  212,  216,  221,  222,  223,  225,  226,  228,
        232,  233,  239,  241,  249,  254,  264,  267,  268,  269,  271,
        272,  273,  277,  282,  283,  284,  285,  286,  287,  288,  290,
        294,  295,  296,  298,  299,  300,  303,  304,  307,  309,  312,
        313,  314,  315,  317,  322,  326,  327,  3

In [943]:
indices

array([  16,   23,   29, ..., 9987, 9990, 9994], dtype=int64)

In [944]:
gnn_right = np.where(y_pred_gnn == y_true)[0]

In [945]:
dnn_right = np.where(y_pred_dnn == y_true)[0]

In [946]:
np.savetxt('indices_gnn_right.txt', np.intersect1d(indices, gnn_right), delimiter=',')

In [947]:
np.savetxt('indices_dnn_right.txt', np.intersect1d(indices, dnn_right), delimiter=',')