<a href="https://colab.research.google.com/github/ayushjain1144/NER/blob/master/NER_NN_balanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import numpy as np
import sys
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import imblearn
from imblearn.over_sampling import SMOTE




# Loading the features and vocabulary

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

!ln -s content/gdrive/My\ Drive/NER /ner_dir

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
ln: failed to create symbolic link '/ner_dir/NER': Operation not supported


In [3]:
%cd ..
%cd /ner_dir


/
/content/gdrive/My Drive/NER


In [4]:
!ls

activations.py			      params-450-range_initialization.npy
cal_statistics.py		      params-500-range_initialization.npy
Conll.ipynb			      params-50.npy
dataset				      params-50-random_initialization.npy
fig1-loss-Exp1.jpg		      params-50-range_initialization.npy
initial_experiment.ipynb	      params-550-range_initialization.npy
initialization.py		      params-600-range_initialization.npy
loss.py				      params-650-range_initialization.npy
NER_NN.ipynb			      params-700-range_initialization.npy
NER_NN_network.ipynb		      params-750-range_initialization.npy
nn.py				      params-800-range_initialization.npy
params-0.npy			      params-850-range_initialization.npy
params-0-random_initialization.npy    params-900-range_initialization.npy
params-0-range_initialization.npy     params-950-range_initialization.npy
params-100.npy			      __pycache__
params-100-random_initialization.npy  README.md
params-100-range_initialization.npy   results_bal.txt
params-150.npy			      results.txt
pa

In [5]:
import activations
import loss
import initialization as init_layer

In [6]:
vocab = np.load('vocab.npy')
train_features = np.load('train_features.npy').T
test_features = np.load('test_features.npy').T
val_features = np.load('val_features.npy').T
y_train = np.load('ytrain.npy').T
y_val = np.load('yval.npy').T
y_test = np.load('ytest.npy').T

In [7]:
print(train_features.shape)
print(vocab.shape)
print(test_features.shape)
print(val_features.shape)
print(y_train.shape)
print(y_test.shape)
print(y_val.shape)

(900, 204566)
(26872, 300)
(900, 46665)
(900, 51577)
(10, 204566)
(10, 46665)
(10, 51577)


In [8]:
train_features[:10]

array([[ 1.        , -0.92605459,  0.00982666, ...,  1.38121068,
         0.05078125,  1.46252757],
       [ 0.        , -1.13792351,  0.2265625 , ...,  0.8632994 ,
        -0.09326172, -0.07399186],
       [ 0.        , -0.7880129 ,  0.28125   , ...,  0.76144395,
         0.06494141,  0.03143081],
       ...,
       [ 0.        , -0.43820235, -0.03540039, ...,  0.7176954 ,
        -0.08154297,  0.11263644],
       [ 0.        , -0.95179252,  0.14746094, ...,  0.69231712,
         0.13085938,  0.80112245],
       [ 0.        , -1.45894089,  0.12890625, ..., -1.61798501,
         0.12597656, -0.11506672]])

In [9]:
y_train[:10]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [10]:
C = 1
word_vector_dim = 300
num_tags = 10

# Neural Netwwork Architecture

In [11]:
nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  #{"layer_size": 300, "activation": "relu"},
  #{"layer_size": 100, "activation": "relu"},
  {"layer_size": 10, "activation": "sigmoid"}
]

In [12]:
nn_architecture[1]['activation']

'sigmoid'

# Initialize Parameters

In [13]:
def initialize_parameters(nn_architecture, initialization = "range_initialization", seed=5):

  parameters = {}
  num_layers = len(nn_architecture)

  for i in range(1, num_layers):

    if initialization == "range_initialization":
      parameters['W' + str(i)] = init_layer.range_initializtion(nn_architecture[i]["layer_size"],
                                            nn_architecture[i - 1]["layer_size"], seed)
      # print(parameters['W1'])
    else:
      parameters['W' + str(i)] = init_layer.random_initialization(nn_architecture[i]["layer_size"],
                                            nn_architecture[i - 1]["layer_size"], seed)

    parameters['b' + str(i)] = np.zeros((nn_architecture[i]["layer_size"], 1))

  return parameters

In [14]:
param = initialize_parameters(nn_architecture, initialization="range_initialization")
for k in param.keys():
  print(f"{k}: {param[k].shape}")

W1: (10, 900)
b1: (10, 1)


# Forward Propogation

In [15]:
# Z = W * X + b
# Here A is output of previous layer

def linear_forward(A_prev, W, b):
  return np.dot(W, A_prev) + b

# apply activation h:  A = h(X) 
def apply_activation(A, activation, alpha=0.01):

  if activation == "sigmoid":
    return activations.sigmoid(A)
  elif activation == "tanh":
    return activations.tanh(A)
  elif activation == "relu":
    return activations.relu(A)
  elif activation == "leaky_relu":
    return activations.leaky_relu(A, alpha)
  elif activation == "softmax":
    return loss.softmax(A)
  elif activation == 'none':
    #print("None activation used")
    return A
  else:
    print(f"ERROR: {activation} activation not supported")
    sys.exit(1)
  
# driver forward propogation
def model_forward(X, parameters, nn_architecture, alpha=0.01):

  forward_cache = []
  A = X
  num_layers = len(nn_architecture)

  for i in range(1, num_layers):
    A_prev = A
    # print(f"{A_prev.shape}: for")
    W = parameters["W" + str(i)]
    b = parameters["b" + str(i)]

    Z = linear_forward(A_prev, W, b)
    activation = nn_architecture[i]['activation']
    # print(activation)
    A = apply_activation(Z, activation, alpha)
    # print(A)
    forward_cache.append(((A_prev, W, b), Z))

  # print("after loop")
  # print(A)
  return A, forward_cache  

# Backpropogation 

In [16]:
def linear_backward(dz, cache):
  A_prev, W, b = cache
  m = A_prev.shape[1]
  # print(m)
  dw = (1 / m) * np.dot(dz, A_prev.T)
  db = (1 / m) * np.sum(dz, axis=1, keepdims=True)
  dA_prev = np.dot(W.T, dz)

  assert dA_prev.shape == A_prev.shape
  assert dw.shape == W.shape
  assert db.shape == b.shape

  return dA_prev, dw, db      

In [17]:
def apply_activation_backward(dA, cache, activation_fn):
  linear_cache, activation_cache = cache

  if activation_fn == "sigmoid":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "tanh":
    dZ = activations.tanh_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "relu":
    dZ = activations.relu_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "leaky_relu":
    dZ = activations.leaky_relu_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
  
  elif activation_fn == "none":
    dZ = dA
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
  
  else:
    print("Activation not available")
    sys.exit(1)

  return dA_prev, dw, db

  


In [18]:
def model_backward(AL, y, caches, nn_architecture):
  
  y = y.reshape(AL.shape)
  L = len(caches)
  # print(f"Len of cache: {len(caches)}")
  grads = {}

  dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))
  # print(caches.keys())
  # print(grads.keys())
  # print(len(caches))
  grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = apply_activation_backward(dAL, caches[L-1], nn_architecture[L]['activation'])

  for l in range(L - 1, 0, -1):
    # print(l)
    current_cache = caches[l - 1]
    grads["dA" + str(l - 1)], grads["dW" + str(l)],  \
        grads["db" + str(l)] = apply_activation_backward(
            grads["dA" + str(l)], current_cache, 
            nn_architecture[l]['activation']
        )
    
  return grads

In [19]:
def update_parameters(parameters, grads, lr):

  L = len(parameters) // 2

  for l in range(1, L + 1):
    parameters["W" + str(l)] = parameters["W" + str(l)] - \
            lr * grads["dW" + str(l)]
    parameters["b" + str(l)] = parameters["b" + str(l)] - \
            lr * grads["db" + str(l)]
  return parameters

In [None]:
# import warnings
# warnings.filterwarnings("ignore")
# def balance_data(X, y):
#   X_ret = []
#   y_ret = []
#   X = X[:100000]
#   y = y[:100000]
#   for j in range(0, X.shape[1], 10000):
#     oversample = SMOTE()
#     X_bal, y_bal = oversample.fit_resample(X.T[j:j+10000], y.T[j:j+10000])
#     print(j)
#     X_ret.append(X_bal)
#     y_ret.append(y_bal)
#   return np.array(X_ret), np.array(y_ret)

# X_bal, y_bal = balance_data(train_features, y_train)
# print(X_bal.shape)
# print(y_bal.shape)

In [20]:
def model(X, y, nn_architecture, exp_name, initialization='range_initialisation', lr=0.01, num_iterations=2000, print_cost=True, checkpoint_initialisation=None):

  np.random.seed(1)

  if checkpoint_initialisation == None:
    parameters = initialize_parameters(nn_architecture, initialization)
  else:
    print(f"Loading checkpoints from file {checkpoint_initialisation}")
    parameters = np.load(checkpoint_initialisation, allow_pickle=True).item()

  cost_list = []
  val_loss = []
  val_list = []
  train_list = []
  #iterate over iterations

  for i in tqdm_notebook(range(0, num_iterations)):

    for j in tqdm_notebook(range(0, X.shape[1], 10000)):
      #forward step

      #balance the data

      oversample = SMOTE()
      X_bal, y_bal = oversample.fit_resample(X.T[j:j+10000], y.T[j:j+10000])
      AL, caches = model_forward(X_bal.T, parameters, nn_architecture)
      
      # print(cost)
      grads = model_backward(AL, y_bal.T, caches, nn_architecture)

      parameters = update_parameters(parameters, grads, lr)

    AL_val, _ = model_forward(val_features, parameters, nn_architecture)
    #cost = loss.cross_entropy_loss(AL, y)
    val_cost = loss.cross_entropy_loss(AL_val, y_val)  
    print(f"The cost after {i + 1} iterations is: Val: {val_cost: .4f}")

    train_acc = accuracy(X, parameters, y, nn_architecture)
    acc = accuracy(val_features, parameters, y_val, nn_architecture)
    print(f"After {i + 1} iterations: Val Acc: {acc}; Train Acc: {train_acc}")
    val_list.append(acc)
    train_list.append(train_acc)

    
    #cost_list.append(cost)
    val_loss.append(val_cost)
    if i % 50 == 0:
      print(f"Creating checkpoint for {i}th iteration")
      np.save(f'params-{i}-{initialization}.npy', parameters)

    #plotting cost curve

  plt.figure(figsize=(10, 6))
  #plt.plot(cost_list, 'b', label='train_loss')
  plt.plot(val_loss, 'r', label='val_loss')
  plt.legend()
  plt.xlabel("Iterations")
  plt.ylabel("Loss")
  plt.title(f"Loss curve for the learning rate = {lr}")
  plt.savefig(f'fig1-loss-{exp_name}.jpg', bbox_inches='tight')
  plt.figure(figsize=(10, 6))
  plt.plot(val_list, 'g', label="val acc")
  plt.ylim(0, 100)
  plt.legend()
  plt.savefig(f'val-acc-{exp_name}.jpg', bbox_inches='tight')
  plt.figure(figsize=(10,6))
  plt.plot(train_list, 'r', label ="train acc")
  plt.ylim(0, 100)
  plt.legend()
  plt.savefig(f'train-loss-{exp_name}.jpg', bbox_inches='tight')
  plt.xlabel("Iterations")
  plt.ylabel("Accuracy")
  plt.title(f"Accuracy curve for the learning rate = {lr}")

  return parameters

In [21]:
def accuracy(X, parameters, y, nn_architecture):
  probs, caches = model_forward(X, parameters, nn_architecture)
  labels = np.argmax(probs, axis=0)
  gt = np.argmax(y, axis=0)
  accuracy = np.mean(labels==gt) * 100
  
  return accuracy

In [22]:
# Todo

#implement margin loss
#implement regularisation

# Gradient Checking

In [23]:
from numpy.linalg import norm

def dictionary_to_vector(params_dict):
  count = 0

  for key in params_dict.keys():
    new_vector = np.reshape(params_dict[key], (-1, 1))

    if count == 0:
      theta_vector = new_vector
    else:
      theta_vector = np.concatenate((theta_vector, new_vector))
    count += 1

  return theta_vector



In [24]:
def vector_to_dictionary(vector, nn_architecture):

  L = len(nn_architecture)
  parameters = {}
  k = 0

  for l in range(1, L):

    w_dim = nn_architecture[l]['layer_size'] * nn_architecture[l - 1]['layer_size']
    b_dim = nn_architecture[l]['layer_size']

    temp_dim = k + w_dim

    parameters["W" + str(l)] = vector[k:temp_dim].reshape(nn_architecture[l]['layer_size'], nn_architecture[l - 1]['layer_size'])
    parameters["b" + str(l)] = vector[temp_dim:temp_dim + b_dim].reshape(b_dim, 1)

    k += w_dim + b_dim

  return parameters

In [25]:
def gradients_to_vector(gradients):

  valid_grads = [key for key in gradients.keys() if not key.startswith("dA")]
  L = len(valid_grads) // 2

  count = 0

  for l in range(1, L + 1):
    if count == 0:
      new_grads = gradients["dW" + str(l)].reshape(-1, 1)
      new_grads = np.concatenate((new_grads, gradients["db" + str(l)].reshape(-1, 1)))

    else:
      new_grads = np.concatenate((new_grads, gradients["dW" + str(l)].reshape(-1, 1)))
      new_grads = np.concatenate((new_grads, gradients["db" + str(l)].reshape(-1, 1)))

    count += 1
  return new_grads

In [26]:
def forward_prop_cost(X, parameters, Y, nn_architecture):

  AL, _ = model_forward(X, parameters, nn_architecture)
  cost = loss.cross_entropy_loss(AL, Y)
  # print(cost)
  return cost

def gradient_check(parameters, gradients, X, Y, nn_architecture, epsilon=1e-4):

  parameters_vector = dictionary_to_vector(parameters)
  gradients_vector = gradients_to_vector(gradients)

  grads_approx = np.zeros_like(parameters_vector)

  for i in range(len(parameters_vector)):

    theta_plus = np.copy(parameters_vector)
    theta_plus[i] = theta_plus[i] + epsilon
    j_plus = forward_prop_cost(X, vector_to_dictionary(theta_plus, nn_architecture), Y, nn_architecture)


    theta_minus = np.copy(parameters_vector)
    theta_minus[i] = theta_minus[i] - epsilon
    j_minus = forward_prop_cost(X, vector_to_dictionary(theta_minus, nn_architecture), Y, nn_architecture)

    grads_approx[i] = (j_plus - j_minus) / (2 * epsilon)

    #print(f"grads_approx{i}: {grads_approx[i]} and {gradients_vector[i]}")

  

  numerator = norm(gradients_vector - grads_approx)
  denominator = norm(grads_approx) + norm(gradients_vector)

  difference = numerator / denominator

  if difference > 10e-4:
    print(f"Backprop Wrong: difference = {difference}")
  else:
    print(f"Backprop Correct: difference = {difference}")

  return difference





### Execute the following cell for gradient checking. 

In [27]:
# performing graident checking

def perform_gradient_check():
  parameters = initialize_parameters(nn_architecture)

  perms = np.random.permutation(train_features.shape[1])
  index = perms[:1]
  print(train_features[:, index].shape)
  print(y_train[:, index].shape)

  AL, caches = model_forward(train_features[:, index], parameters, nn_architecture)
  grads = model_backward(AL, y_train[:, index], caches, nn_architecture)
  #print(grads)
  difference = gradient_check(parameters, grads, train_features[:, index], y_train[:, index], nn_architecture)

# perform_gradient_check()

### Execute the following cell for training the model

In [None]:
import warnings
warnings.filterwarnings("ignore")
fp = open('results_bal.txt', 'a')

nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "relu"},
  {"layer_size": 100, "activation": "relu"},
  {"layer_size": 10, "activation": "sigmoid"}
]

params = model(train_features, y_train, nn_architecture, exp_name='Exp1', initialization='range_initialization', lr=0.1, num_iterations=100)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment: Multi Layer Perceptron. range initialisatioin, 300 iterations\n\n")
fp.write(stats)
fp.close()

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 1 iterations is: Val:  3.1684
After 1 iterations: Val Acc: 26.66304748240495; Train Acc: 26.46187538496133
Creating checkpoint for 0th iteration


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 2 iterations is: Val:  2.8505
After 2 iterations: Val Acc: 31.655582914865153; Train Acc: 31.90461758063412


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 3 iterations is: Val:  2.6851
After 3 iterations: Val Acc: 32.704500067859705; Train Acc: 32.928736935756675


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 4 iterations is: Val:  2.5738
After 4 iterations: Val Acc: 33.8445431103011; Train Acc: 34.14985872530137


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 5 iterations is: Val:  2.4788
After 5 iterations: Val Acc: 35.76981988095469; Train Acc: 36.152146495507566


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 6 iterations is: Val:  2.3956
After 6 iterations: Val Acc: 38.99800298582702; Train Acc: 39.32569439691835


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 7 iterations is: Val:  2.3333
After 7 iterations: Val Acc: 43.13938383387944; Train Acc: 43.39186375057439


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 8 iterations is: Val:  2.2594
After 8 iterations: Val Acc: 48.03885452818117; Train Acc: 48.32963444560679


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 9 iterations is: Val:  2.2065
After 9 iterations: Val Acc: 51.53072105783586; Train Acc: 51.9890890959397


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 10 iterations is: Val:  2.1535
After 10 iterations: Val Acc: 54.524303468600344; Train Acc: 55.09615478623036


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 11 iterations is: Val:  2.1013
After 11 iterations: Val Acc: 57.006029819493186; Train Acc: 57.655231074567624


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 12 iterations is: Val:  2.0602
After 12 iterations: Val Acc: 58.745177113829804; Train Acc: 59.353460496856755


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 13 iterations is: Val:  2.0026
After 13 iterations: Val Acc: 61.06016247552204; Train Acc: 61.65345169774058


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 14 iterations is: Val:  1.9618
After 14 iterations: Val Acc: 62.11877387207476; Train Acc: 62.902926194968856


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 15 iterations is: Val:  1.9258
After 15 iterations: Val Acc: 63.369331291079355; Train Acc: 64.15777792986127


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 16 iterations is: Val:  1.8870
After 16 iterations: Val Acc: 64.54815130775346; Train Acc: 65.40089750985013


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 17 iterations is: Val:  1.8579
After 17 iterations: Val Acc: 65.45941020222192; Train Acc: 66.26809929313767


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 18 iterations is: Val:  1.8286
After 18 iterations: Val Acc: 66.15157919227562; Train Acc: 67.09570505362572


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 19 iterations is: Val:  1.8047
After 19 iterations: Val Acc: 66.72547841091959; Train Acc: 67.67253600305037


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 20 iterations is: Val:  1.7719
After 20 iterations: Val Acc: 67.46030207262928; Train Acc: 68.52018419483198


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 21 iterations is: Val:  1.7518
After 21 iterations: Val Acc: 67.95470849409621; Train Acc: 69.113146857249


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 22 iterations is: Val:  1.7335
After 22 iterations: Val Acc: 68.3308451441534; Train Acc: 69.61958487725232


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 23 iterations is: Val:  1.7081
After 23 iterations: Val Acc: 68.90474436279736; Train Acc: 70.2242796945729


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 24 iterations is: Val:  1.6927
After 24 iterations: Val Acc: 69.23047094635206; Train Acc: 70.65934710557961


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 25 iterations is: Val:  1.6793
After 25 iterations: Val Acc: 69.46313279174827; Train Acc: 71.07241672614218


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 26 iterations is: Val:  1.6647
After 26 iterations: Val Acc: 69.8082478624193; Train Acc: 71.48939706500592


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 27 iterations is: Val:  1.6438
After 27 iterations: Val Acc: 70.28132694805824; Train Acc: 72.11120127489417


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 28 iterations is: Val:  1.6333
After 28 iterations: Val Acc: 70.60317583418966; Train Acc: 72.50129542543726


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 29 iterations is: Val:  1.6198
After 29 iterations: Val Acc: 70.95216860228396; Train Acc: 72.86792526617326


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 30 iterations is: Val:  1.6128
After 30 iterations: Val Acc: 71.27789518583864; Train Acc: 73.24628726181281


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 31 iterations is: Val:  1.5940
After 31 iterations: Val Acc: 71.89638792485023; Train Acc: 74.05727246952084


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 32 iterations is: Val:  1.5859
After 32 iterations: Val Acc: 72.26476918006088; Train Acc: 74.47278628902163


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 33 iterations is: Val:  1.5691
After 33 iterations: Val Acc: 72.68937704790895; Train Acc: 75.05548331589806


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 34 iterations is: Val:  1.5621
After 34 iterations: Val Acc: 73.02285902631017; Train Acc: 75.4177135985452


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 35 iterations is: Val:  1.5545
After 35 iterations: Val Acc: 73.29235899722745; Train Acc: 75.74132553796818


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 36 iterations is: Val:  1.5413
After 36 iterations: Val Acc: 73.5773697578378; Train Acc: 76.11870985403245


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 37 iterations is: Val:  1.5259
After 37 iterations: Val Acc: 73.92636252593209; Train Acc: 76.52835759608146


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 38 iterations is: Val:  1.5252
After 38 iterations: Val Acc: 74.01748841537895; Train Acc: 76.60803848146809


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 39 iterations is: Val:  1.5159
After 39 iterations: Val Acc: 74.17647400973301; Train Acc: 76.88618832063979


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 40 iterations is: Val:  1.5046
After 40 iterations: Val Acc: 74.44403513193866; Train Acc: 77.16922655768799


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 41 iterations is: Val:  1.4953
After 41 iterations: Val Acc: 74.61853151598581; Train Acc: 77.42537860641553


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 42 iterations is: Val:  1.4819
After 42 iterations: Val Acc: 74.88027609205655; Train Acc: 77.7856535299121


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 43 iterations is: Val:  1.4721
After 43 iterations: Val Acc: 75.10324369389456; Train Acc: 78.05695961205674


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 44 iterations is: Val:  1.4598
After 44 iterations: Val Acc: 75.4153983364678; Train Acc: 78.40696890001271


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 45 iterations is: Val:  1.4554
After 45 iterations: Val Acc: 75.50264652849138; Train Acc: 78.59175033974365


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 46 iterations is: Val:  1.4477
After 46 iterations: Val Acc: 75.70622564321306; Train Acc: 78.84154747123178


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 47 iterations is: Val:  1.4385
After 47 iterations: Val Acc: 75.99705294995832; Train Acc: 79.1289852663688


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 48 iterations is: Val:  1.4365
After 48 iterations: Val Acc: 76.09787308296335; Train Acc: 79.24190725731549


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 49 iterations is: Val:  1.4221
After 49 iterations: Val Acc: 76.40227233069004; Train Acc: 79.60022682166147


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 50 iterations is: Val:  1.4177
After 50 iterations: Val Acc: 76.52441979952305; Train Acc: 79.76789886882473


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 51 iterations is: Val:  1.4047
After 51 iterations: Val Acc: 76.7881032243054; Train Acc: 80.08906660930947
Creating checkpoint for 50th iteration


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 52 iterations is: Val:  1.4053
After 52 iterations: Val Acc: 76.86177947534753; Train Acc: 80.1892787657773


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 53 iterations is: Val:  1.3874
After 53 iterations: Val Acc: 77.26118230994436; Train Acc: 80.65416540383055


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 54 iterations is: Val:  1.3858
After 54 iterations: Val Acc: 77.31547007387015; Train Acc: 80.7626878366884


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 55 iterations is: Val:  1.3779
After 55 iterations: Val Acc: 77.46670027337767; Train Acc: 80.99293137667061


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 56 iterations is: Val:  1.3700
After 56 iterations: Val Acc: 77.61986932159684; Train Acc: 81.22415259622812


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 57 iterations is: Val:  1.3693
After 57 iterations: Val Acc: 77.71293405975531; Train Acc: 81.33560806781186


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 58 iterations is: Val:  1.3591
After 58 iterations: Val Acc: 77.916513174477; Train Acc: 81.63673337700303


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 59 iterations is: Val:  1.3551
After 59 iterations: Val Acc: 77.99212827423077; Train Acc: 81.82004829737102


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 60 iterations is: Val:  1.3425
After 60 iterations: Val Acc: 78.29652752195746; Train Acc: 82.12215128613748


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 61 iterations is: Val:  1.3438
After 61 iterations: Val Acc: 78.32948795005525; Train Acc: 82.23018487920768


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 62 iterations is: Val:  1.3372
After 62 iterations: Val Acc: 78.50204548539077; Train Acc: 82.396390407008


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 63 iterations is: Val:  1.3293
After 63 iterations: Val Acc: 78.69399150784264; Train Acc: 82.58459372525249


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 64 iterations is: Val:  1.3207
After 64 iterations: Val Acc: 78.91695910968068; Train Acc: 82.91602710127782


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 65 iterations is: Val:  1.3135
After 65 iterations: Val Acc: 79.10502743470927; Train Acc: 83.10862997761113


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 66 iterations is: Val:  1.3136
After 66 iterations: Val Acc: 79.1670705934816; Train Acc: 83.23572832239961


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 67 iterations is: Val:  1.3084
After 67 iterations: Val Acc: 79.2834015161797; Train Acc: 83.37455882209164


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 68 iterations is: Val:  1.2953
After 68 iterations: Val Acc: 79.55484033580859; Train Acc: 83.71479131429466


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 69 iterations is: Val:  1.2966
After 69 iterations: Val Acc: 79.54708494096205; Train Acc: 83.78713960286656


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 70 iterations is: Val:  1.2891
After 70 iterations: Val Acc: 79.6653547123718; Train Acc: 83.97632060068632


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))


The cost after 71 iterations is: Val:  1.2824
After 71 iterations: Val Acc: 79.80107412218625; Train Acc: 84.18896590831321


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))

In [37]:
AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

print_statistics(AL, y_test)

              precision    recall  f1-score   support

           0  0.1561148250 0.4910327767 0.2369088468      1617
           1  0.9446104344 0.3233504668 0.4817817720     38132
           2  0.0456329735 0.3618677043 0.0810457516       257
           3  0.1018404908 0.1499096930 0.1212859230      1661
           4  0.2540018121 0.5041966427 0.3378188391      1668
           5  0.0425531915 0.1916167665 0.0696409140       835
           6  0.0133144476 0.6527777778 0.0260966130       216
           7  0.2488299532 0.5519031142 0.3430107527      1156
           8  0.1021489550 0.4943019943 0.1693095877       702
           9  1.0000000000 1.0000000000 1.0000000000       421

    accuracy                      0.3431693989     46665
   macro avg  0.2909047083 0.4720956936 0.2866899000     46665
weighted avg  0.8077926998 0.3431693989 0.4401652936     46665



'              precision    recall  f1-score   support\n\n           0  0.1561148250 0.4910327767 0.2369088468      1617\n           1  0.9446104344 0.3233504668 0.4817817720     38132\n           2  0.0456329735 0.3618677043 0.0810457516       257\n           3  0.1018404908 0.1499096930 0.1212859230      1661\n           4  0.2540018121 0.5041966427 0.3378188391      1668\n           5  0.0425531915 0.1916167665 0.0696409140       835\n           6  0.0133144476 0.6527777778 0.0260966130       216\n           7  0.2488299532 0.5519031142 0.3430107527      1156\n           8  0.1021489550 0.4943019943 0.1693095877       702\n           9  1.0000000000 1.0000000000 1.0000000000       421\n\n    accuracy                      0.3431693989     46665\n   macro avg  0.2909047083 0.4720956936 0.2866899000     46665\nweighted avg  0.8077926998 0.3431693989 0.4401652936     46665\n'

In [38]:
print(AL)

[[6.20492747e-02 1.48528730e-01 2.52194305e-01 ... 6.94615012e-02
  6.46561126e-01 2.66863641e-02]
 [2.17286003e-01 4.06739694e-01 4.78508456e-02 ... 7.14632140e-01
  9.43705062e-02 8.24554374e-01]
 [1.11085666e-01 2.56548678e-02 2.40034765e-03 ... 2.00169192e-02
  5.20006983e-04 2.77311402e-02]
 ...
 [1.80813427e-02 4.75094598e-01 1.06946601e-02 ... 2.39390757e-02
  3.85736154e-02 3.65678144e-02]
 [1.13949319e-02 1.70761593e-02 1.90810535e-01 ... 1.53483625e-03
  8.73313971e-03 7.60739638e-03]
 [1.51473254e-04 4.36428630e-05 3.32858414e-04 ... 1.40821464e-05
  2.45331574e-04 1.30316918e-05]]


In [None]:
from collections import Counter



oversample = SMOTE()
X_bal, y_bal = oversample.fit_resample(train_features.T[:10000], y_train.T[:10000])





In [None]:
X_bal.shape
y_bal.shape


y_bal_t = np.argmax(y_bal, axis=1)
from collections import Counter
counter = Counter(y_bal_t)
print(counter)

y_train_t = np.argmax(y_train.T[:1000], axis=1)
counter = Counter(y_train_t)
print(counter)

Counter({3: 7585, 1: 7585, 8: 7585, 0: 7585, 7: 7585, 4: 7585, 5: 7585, 9: 7585, 6: 7585, 2: 7585})
Counter({1: 843, 4: 41, 0: 23, 8: 22, 3: 19, 5: 18, 7: 16, 9: 12, 6: 5, 2: 1})


In [40]:
# Experiment 1: Single Layer Perceptron. range initialisatioin, 2000 iterations, relu
fp = open('results_bal.txt', 'a')

params = model(train_features, y_train, nn_architecture, exp_name='Exp1', initialization='range_initialization', lr=0.1, num_iterations=1000)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment1: Single Layer Perceptron. range initialisatioin, 1000 iterations, relu\n\n")
fp.write(stats)
fp.close()


# Experiment 2: Single Layer Perceptron. random initialisatioin, 2000 iterations, relu
fp = open('results.txt', 'a')

params = model(train_features, y_train, nn_architecture, exp_name='Exp2', initialization='random_initialization', lr=0.1, num_iterations=1000)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment2: Single Layer Perceptron. random initialisatioin, 1000 iterations, relu\n\n")
fp.write(stats)
fp.close()




# Experiment 3: Multi Layer Perceptron. range initialisatioin, 1000 iterations, relu
fp = open('results.txt', 'a')

nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "relu"},
  {"layer_size": 100, "activation": "relu"},
  {"layer_size": 10, "activation": "sigmoid"}
]

params = model(train_features, y_train, nn_architecture, exp_name='Exp3', initialization='range_initialization', lr=0.1, num_iterations=2000)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment3: Multi Layer Perceptron. range initialisatioin, 1000 iterations, relu\n\n")
fp.write(stats)
fp.close()


# Experiment 4: Multi Layer Perceptron. range initialisatioin, 1000 iterations, tanh
fp = open('results.txt', 'a')

nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "tanh"},
  {"layer_size": 100, "activation": "tanh"},
  {"layer_size": 10, "activation": "sigmoid"}
]

params = model(train_features, y_train, nn_architecture, exp_name='Exp4', initialization='range_initialization', lr=0.1, num_iterations=2000)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment4: Multiple Layer Perceptron. range initialisatioin, 1000 iterations, tanh\n\n")
fp.write(stats)
fp.close()

# Experiment 5: Multi Layer Perceptron. range initialisatioin, 1000 iterations, sigmoid
fp = open('results.txt', 'a')

nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "sigmoid"},
  {"layer_size": 100, "activation": "sigmoid"},
  {"layer_size": 10, "activation": "sigmoid"}
]



params = model(train_features, y_train, nn_architecture, exp_name='Exp5', initialization='range_initialization', lr=0.1, num_iterations=2000)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment5: Multiple Layer Perceptron. range initialisatioin, 1000 iterations, sigmoid\n\n")
fp.write(stats)
fp.close()


# Experiment 6: Multi Layer Perceptron. random initialisatioin, 1000 iterations, relu
fp = open('results.txt', 'a')

nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "relu"},
  {"layer_size": 100, "activation": "relu"},
  {"layer_size": 10, "activation": "sigmoid"}
]

params = model(train_features, y_train, nn_architecture, exp_name='Exp6', initialization='random_initialization', lr=0.1, num_iterations=2000)


accuracy(test_features, params, y_test, nn_architecture)

AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

stats = print_statistics(AL, y_test)

fp.write("Experiment6: Multiple Layer Perceptron. random initialisatioin, 1000 iterations, relu\n\n")
fp.write(stats)
fp.close()


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))

KeyboardInterrupt: ignored

In [None]:
AL, _ = model_forward(test_features, params, nn_architecture)

from statistics import print_statistics

print_statistics(AL, y_test)

              precision    recall  f1-score   support

           0  0.5359281437 0.2213976500 0.3133479212      1617
           1  0.8674863705 0.9806199517 0.9205903714     38132
           2  0.3076923077 0.0311284047 0.0565371025       257
           3  0.3625592417 0.0921131848 0.1469035046      1661
           4  0.5826086957 0.3615107914 0.4461709212      1668
           5  0.3200000000 0.0287425150 0.0527472527       835
           6  0.1111111111 0.0092592593 0.0170940171       216
           7  0.5789473684 0.3235294118 0.4150943396      1156
           8  0.4853556485 0.1652421652 0.2465462274       702
           9  0.9767981439 1.0000000000 0.9882629108       421

    accuracy                      0.8454301939     46665
   macro avg  0.5128487031 0.3213543334 0.3603294569     46665
weighted avg  0.7995516995 0.8454301939 0.8085311248     46665



'              precision    recall  f1-score   support\n\n           0  0.5359281437 0.2213976500 0.3133479212      1617\n           1  0.8674863705 0.9806199517 0.9205903714     38132\n           2  0.3076923077 0.0311284047 0.0565371025       257\n           3  0.3625592417 0.0921131848 0.1469035046      1661\n           4  0.5826086957 0.3615107914 0.4461709212      1668\n           5  0.3200000000 0.0287425150 0.0527472527       835\n           6  0.1111111111 0.0092592593 0.0170940171       216\n           7  0.5789473684 0.3235294118 0.4150943396      1156\n           8  0.4853556485 0.1652421652 0.2465462274       702\n           9  0.9767981439 1.0000000000 0.9882629108       421\n\n    accuracy                      0.8454301939     46665\n   macro avg  0.5128487031 0.3213543334 0.3603294569     46665\nweighted avg  0.7995516995 0.8454301939 0.8085311248     46665\n'