# Load data

In [1]:
import numpy as np
import time

from linear_classifiers import *
from prec_recall import prec_recall

In [2]:
dataset = "subsample"

if dataset == "subsample":
    X_train = np.load("../data/subsamp_data/processed_X_train.npy")
    y_train = np.load("../data/subsamp_data/processed_y_train.npy")
elif dataset == "origin":
    X_train = np.load("../data/origin_data/X_train.npy")
    y_train = np.load("../data/origin_data/y_train.npy")
else:
    raise Exception("Unknown dataset name")

# val / test always on the largest dataset
X_val = np.load("../data/origin_data/X_val.npy")
y_val = np.load("../data/origin_data/y_val.npy")
X_test = np.load("../data/origin_data/X_test.npy")
y_test = np.load("../data/origin_data/y_test.npy")

print("shape: {}".format(X_train.shape))
print(X_train)
print(X_train[0])

shape: (622, 28)
[[ 0.29015516  0.04924313 -0.74052367 ...  2.74526067 -0.2204016
   0.16823274]
 [-1.61218737 -4.62014787 -0.28331816 ...  0.89152121 -0.31278271
   0.26827516]
 [-4.28058383  1.42110001 -3.90822884 ... -1.14992267 -1.80988612
   0.72305143]
 ...
 [ 1.23220629  0.26485221  0.42781704 ... -0.44599829  0.03289931
   0.01365267]
 [ 1.20493424  3.23806951 -6.0103244  ...  0.05603474  0.49182782
   0.34084669]
 [-0.32479146  1.08346101 -1.20520689 ...  0.07256116  0.48961806
   0.33803559]]
[ 0.29015516  0.04924313 -0.74052367  2.86546277  1.39529392 -0.53516328
  0.14254337 -0.22276971 -1.46369065  1.71353755 -1.12757349 -0.70865735
  0.27218608  0.27471029  0.23519195 -0.46355271  0.47299539 -0.44789916
  1.79092415  0.24757953  0.33734885  1.01819058  0.30355001  0.83388612
 -1.2223061   2.74526067 -0.2204016   0.16823274]


# Neural Networks

In [3]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
from prec_recall import prec_recall
# fix random seed for reproducibility
np.random.seed(7)

Using TensorFlow backend.
  return f(*args, **kwds)


In [4]:
weights = []
biases = []

for i in range(1, 4):
    weights.append(np.load("../keras/models_best/w_{}.npy".format(i)))
    biases.append(np.load("../keras/models_best/b_{}.npy".format(i)))
    
for w in weights:
    print(w.shape)
    
print(weights[0])

(28, 12)
(12, 6)
(6, 1)
[[ 0.5135794   1.1593744  -0.08030141  0.2788803  -0.10667941 -0.91568094
  -0.9699817   2.1473632  -0.65720606 -0.97112805  2.1592956  -0.23425752]
 [ 0.513026   -0.86688733  0.14152195  0.9756269  -0.03812868  0.16613284
   0.6390418  -1.0894452   0.23098922 -0.43166974  0.195443    0.27169588]
 [-0.79612184 -0.31300616  0.01003939  0.5441564   0.6771815   0.86358047
   0.46836215 -0.0691051  -0.75780225 -0.21152873 -0.05263001  0.15522678]
 [-1.9530405  -0.98751664  0.07709956 -0.4244684   0.33330667 -0.28903294
  -1.3645107   0.32463753  1.6065955   0.4968592  -0.87009495  0.44481802]
 [ 2.5732617  -0.28998384 -0.17923625 -0.0110664  -0.8278394   0.35161057
   0.86983216 -0.9587814   0.19139259 -0.07849523 -0.19551651 -0.30567694]
 [-0.03341743 -0.45875236 -0.33527657  0.7639824   0.0118873   0.753918
  -0.2722627   0.48212224 -0.09232956 -0.7805643  -0.77936244  0.7769905 ]
 [ 1.1727765  -0.04304819  0.03146831 -0.43923208  0.28405398  0.17397723
   0.51668

In [5]:
start = time.perf_counter()
my_pred = neural_networks(X_val, weights, biases)
end = time.perf_counter()

print("time:\t{}\nresult:\t{}".format(end - start, my_pred[0:10]))
# print(my_pred)
prec_recall(y_val, my_pred)
my_pred = neural_networks(X_test, weights, biases)
prec_recall(y_test, my_pred)

time:	0.05856779099999976
result:	[[1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
TP count:    91
FP count:    8936
TN count:    47932
FN count:    3
Precision rate:  0.010080868505594328
Recall rate: 0.9680851063829787
Accuracy：0.8430708191425863
TP count:    82
FP count:    8785
TN count:    48090
FN count:    5
Precision rate:  0.009247772640126311
Recall rate: 0.9425287356321839
Accuracy：0.8456865980829325


# Logistic Regression

In [6]:
w = np.load("../sklearn/model_LR/LR_best/w.npy")
b = np.load("../sklearn/model_LR/LR_best/b.npy")

start = time.perf_counter()
my_pred = logistic_regression(X_val, w, b)
end = time.perf_counter()

print("time:\t{}\nresult:\t{}".format(end - start, my_pred[0:10]))
prec_recall(y_val, my_pred)
my_pred = logistic_regression(X_test, w, b)
prec_recall(y_test, my_pred)

time:	0.0014024279999986788
result:	[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
TP count:    76
FP count:    29
TN count:    56839
FN count:    18
Precision rate:  0.7238095238095238
Recall rate: 0.8085106382978723
Accuracy：0.9991748885221726
TP count:    65
FP count:    24
TN count:    56851
FN count:    22
Precision rate:  0.7303370786516854
Recall rate: 0.7471264367816092
Accuracy：0.9991924440855307


# SVM

In [7]:
w = np.load("../sklearn/model_SVM/SVM_best/w.npy")
b = np.load("../sklearn/model_SVM/SVM_best/b.npy")

my_pred = logistic_regression(X_val, w, b)
print(my_pred[0:10])
prec_recall(y_val, my_pred)
my_pred = logistic_regression(X_test, w, b)
prec_recall(y_test, my_pred)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
TP count:    74
FP count:    22
TN count:    56846
FN count:    20
Precision rate:  0.7708333333333334
Recall rate: 0.7872340425531915
Accuracy：0.9992626663389628
TP count:    65
FP count:    17
TN count:    56858
FN count:    22
Precision rate:  0.7926829268292683
Recall rate: 0.7471264367816092
Accuracy：0.9993153330290369
