forked from Shifat63/KnowledgeDistillation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
NN.py
142 lines (127 loc) · 4.54 KB
/
NN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from numpy import vstack
import numpy as np
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
# model definition
class MLP(Module):
# define model elements
def __init__(self, n_inputs):
super(MLP, self).__init__()
# input to first hidden layer
self.hidden1 = Linear(n_inputs, 26)
kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
self.act1 = ReLU()
# second hidden layer
self.hidden2 = Linear(26, 13)
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
self.act2 = ReLU()
# Otput layer
self.output = Linear(13, 1)
xavier_uniform_(self.output.weight)
self.act3 = Sigmoid()
# forward propagate input
def forward(self, X):
# input to first hidden layer
X = self.hidden1(X)
X = self.act1(X)
# second hidden layer
X = self.hidden2(X)
X = self.act2(X)
# Output layer
X = self.output(X)
X = self.act3(X)
return X
def forwardLastHiddenLayer(self, X):
# input to first hidden layer
X = self.hidden1(X)
X = self.act1(X)
# second hidden layer
X = self.hidden2(X)
X = self.act2(X)
return X
# train the model
def train_model(train_dl, test_dl, model):
# define the optimization
criterion = BCELoss()
optimizer = SGD(model.parameters(), lr=0.005, momentum=0.9)
iters = [] # save the iteration counts here for plotting
losses = [] # save the avg loss here for plotting
vallosses = [] # save the avg loss here for plotting
# enumerate epochs
for epoch in range(50):
# enumerate mini batches
for i, (inputs, targets) in enumerate(train_dl):
curr_loss = 0
# clear the gradients
optimizer.zero_grad()
# compute the model output
yhat = model(inputs)
# calculate loss
loss = criterion(yhat, targets)
curr_loss += loss
# credit assignment
loss.backward()
# update model weights
optimizer.step()
iters.append(epoch)
losses.append(float(curr_loss/ len(train_dl.dataset)))
for i, (inputs, targets) in enumerate(test_dl):
curr_loss = 0
yhat = model(inputs)
loss = criterion(yhat, targets.float())
curr_loss+=loss
vallosses.append(float(curr_loss/len(test_dl.dataset)))
#after calculating error per epoch
plt.plot(iters, losses, "r")
plt.plot(iters, vallosses, "b")
plt.title("Training Curve (batch_size=1, lr=0.005)")
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.show()
# Generate soft labels
def get_soft_labels(data, model):
xinputs, predictions, true = [], [], []
for i, (inputs, targets) in enumerate(data):
yhat = model(inputs)
yhat = yhat.detach().numpy()
predictions.append(np.asscalar(yhat))
true.append(targets.item())
xinputs.append(inputs.numpy().flatten())
return np.array(xinputs), predictions, true
# xinputs: get activations from tha last hidden layer and put them into the xinputs which will be inputs for training the regression models
# oinputs: contain original inputs
# true: the true labels
def get_last_layer(data, model):
xinputs, true, oinputs = [], [], []
for i, (inputs, targets) in enumerate(data):
yhat = model.forwardLastHiddenLayer(inputs)
yhat = yhat.detach().numpy()
xinputs.append(yhat.flatten())
oinputs.append(inputs.numpy().flatten())
true.append(targets.item())
return np.array(xinputs), np.array(true), np.array(oinputs)
# evaluate the model
def evaluate_model(test_dl, model):
predictions, actuals = list(), list()
for i, (inputs, targets) in enumerate(test_dl):
# evaluate the model on the test set
yhat = model(inputs)
# retrieve numpy array
yhat = yhat.detach().numpy()
# round to class values
yhat = yhat.round()
predictions.append(yhat)
actual = targets.numpy()
actual = actual.reshape((len(actual), 1))
actuals.append(actual)
predictions, actuals = vstack(predictions), vstack(actuals)
acc = accuracy_score(actuals, predictions)
return acc