<a href="https://colab.research.google.com/github/ayushjain1144/NER/blob/master/NER_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import numpy as np
import sys
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook


# Loading the features and vocabulary

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

!ln -s content/gdrive/My\ Drive/NER /ner_dir

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
ln: failed to create symbolic link '/ner_dir/NER': Operation not supported


In [3]:
%cd ..
%cd /ner_dir


/
/content/gdrive/My Drive/NER


In [4]:
!ls

activations.py			      params-200.npy
Conll.ipynb			      params-250.npy
dataset				      params-2.npy
initial_experiment.ipynb	      params-300.npy
initialization.py		      params-50.npy
loss.py				      params-50-random_initialization.npy
NER_NN.ipynb			      __pycache__
NER_NN_network.ipynb		      README.md
nn.py				      test_features.npy
params-0.npy			      train_features.npy
params-0-random_initialization.npy    val_features.npy
params-0-range_initialization.npy     vocab.npy
params-100.npy			      ytest.npy
params-100-random_initialization.npy  ytrain.npy
params-150.npy			      yval.npy
params-1.npy


In [5]:
import activations
import loss
import initialization as init_layer

In [6]:
vocab = np.load('vocab.npy')
train_features = np.load('train_features.npy').T
test_features = np.load('test_features.npy').T
val_features = np.load('val_features.npy').T
y_train = np.load('ytrain.npy').T
y_val = np.load('yval.npy').T
y_test = np.load('ytest.npy').T

In [7]:
print(train_features.shape)
print(vocab.shape)
print(test_features.shape)
print(val_features.shape)
print(y_train.shape)
print(y_test.shape)
print(y_val.shape)

(900, 204566)
(26872, 300)
(900, 46665)
(900, 51577)
(10, 204566)
(10, 46665)
(10, 51577)


In [8]:
train_features[:10]

array([[ 1.        , -0.92605459,  0.00982666, ...,  1.38121068,
         0.05078125,  1.46252757],
       [ 0.        , -1.13792351,  0.2265625 , ...,  0.8632994 ,
        -0.09326172, -0.07399186],
       [ 0.        , -0.7880129 ,  0.28125   , ...,  0.76144395,
         0.06494141,  0.03143081],
       ...,
       [ 0.        , -0.43820235, -0.03540039, ...,  0.7176954 ,
        -0.08154297,  0.11263644],
       [ 0.        , -0.95179252,  0.14746094, ...,  0.69231712,
         0.13085938,  0.80112245],
       [ 0.        , -1.45894089,  0.12890625, ..., -1.61798501,
         0.12597656, -0.11506672]])

In [9]:
y_train[:10]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [10]:
C = 1
word_vector_dim = 300
num_tags = 10

# Neural Netwwork Architecture

In [11]:
nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "tanh"},
  {"layer_size": 100, "activation": "tanh"},
  {"layer_size":10, "activation": "none"}
]

In [12]:
nn_architecture[1]['activation']

'tanh'

# Initialize Parameters

In [13]:
def initialize_parameters(nn_architecture, initialization = "range_initialization", seed=5):

  parameters = {}
  num_layers = len(nn_architecture)

  for i in range(1, num_layers):

    if initialization == "range_initialization":
      parameters['W' + str(i)] = init_layer.range_initializtion(nn_architecture[i]["layer_size"],
                                            nn_architecture[i - 1]["layer_size"], seed)
    else:
      parameters['W' + str(i)] = init_layer.random_initialization(nn_architecture[i]["layer_size"],
                                            nn_architecture[i - 1]["layer_size"], seed)

    parameters['b' + str(i)] = np.zeros((nn_architecture[i]["layer_size"], 1))

  return parameters

In [14]:
initialize_parameters(nn_architecture, initialization="range_initialization")

{'W1': array([[-0.0393161 ,  0.05242947, -0.04147617, ...,  0.01886445,
         -0.0673961 ,  0.03122635],
        [-0.06872783,  0.05610709,  0.01245434, ..., -0.02137381,
         -0.02523462,  0.06828883],
        [ 0.01937873, -0.02733413, -0.0442238 , ...,  0.0283923 ,
         -0.01530554, -0.06579863],
        ...,
        [ 0.00216721,  0.0385514 , -0.00354598, ...,  0.05043729,
         -0.00701171, -0.00209508],
        [-0.06960359, -0.06415798,  0.03159176, ...,  0.0246166 ,
         -0.02451427, -0.03227124],
        [ 0.01399195,  0.05700679,  0.00883558, ...,  0.01464168,
         -0.01428649,  0.02139781]]),
 'W2': array([[-0.06809749,  0.0908105 , -0.07183884, ..., -0.03299794,
          0.10121721, -0.01631657],
        [ 0.10426298,  0.11301164, -0.12085584, ..., -0.07972386,
         -0.11218951,  0.0330916 ],
        [-0.06441902,  0.04898989, -0.02037828, ...,  0.03267418,
         -0.11673346,  0.05408562],
        ...,
        [-0.08484582,  0.09470461,  0.0761

# Forward Propogation

In [15]:
# Z = W * X + b
# Here A is output of previous layer

def linear_forward(A_prev, W, b):
  return np.dot(W, A_prev) + b

# apply activation h:  A = h(X) 
def apply_activation(A, activation, alpha=0.01):

  if activation == "sigmoid":
    return activations.sigmoid(A)
  elif activation == "tanh":
    return activations.tanh(A)
  elif activation == "relu":
    return activations.relu(A)
  elif activation == "leaky_relu":
    return activations.leaky_relu(A, alpha)
  elif activation == 'none':
    return A;
  else:
    print(f"ERROR: {activation} activation not supported")
    sys.exit(1)
  
# driver forward propogation
def model_forward(X, parameters, nn_architecture, alpha=0.01):

  forward_cache = []
  A = X
  num_layers = len(nn_architecture)

  for i in range(1, num_layers):
    A_prev = A
    W = parameters["W" + str(i)]
    b = parameters["b" + str(i)]

    Z = linear_forward(A_prev, W, b)
    activation = nn_architecture[i]['activation']
    A = apply_activation(Z, activation, alpha)

    forward_cache.append(((A_prev, W, b), Z))

  return A, forward_cache  

# Backpropogation 

In [16]:
def linear_backward(dz, cache):
  A_prev, W, b = cache
  m = A_prev.shape[1]

  dw = (1 / m) * np.dot(dz, A_prev.T)
  db = (1 / m) * np.sum(dz, axis=1, keepdims=True)
  dA_prev = np.dot(W.T, dz)

  assert dA_prev.shape == A_prev.shape
  assert dw.shape == W.shape
  assert db.shape == b.shape

  return dA_prev, dw, db      

In [17]:
def apply_activation_backward(dA, cache, activation_fn):
  linear_cache, activation_cache = cache

  if activation_fn == "sigmoid":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "tanh":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "relu":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "leaky_relu":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
  
  elif activation_fn == "none":
    dZ = dA
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
  
  else:
    print("Activation not available")
    sys.exit(1)

  return dA_prev, dw, db

  


In [18]:
def model_backward(AL, y, caches, nn_architecture):
   
  y = y.reshape(AL.shape)
  L = len(caches)
  grads = {}

  dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))
  # print(caches.keys())
  # print(grads.keys())
  # print(len(caches))
  grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = apply_activation_backward(dAL, caches[L-1], nn_architecture[L-1]['activation'])

  for l in range(L - 1, 0, -1):
    current_cache = caches[l - 1]
    grads["dA" + str(l - 1)], grads["dW" + str(l)],  \
        grads["db" + str(l)] = apply_activation_backward(
            grads["dA" + str(l)], current_cache, 
            nn_architecture[l]['activation']
        )
  return grads

In [19]:
def update_parameters(parameters, grads, lr):

  L = len(parameters) // 2

  for l in range(1, L + 1):
    parameters["W" + str(l)] = parameters["W" + str(l)] - \
            lr * grads["dW" + str(l)]
    parameters["b" + str(l)] = parameters["b" + str(l)] - \
            lr * grads["db" + str(l)]
  return parameters

In [20]:
def model(X, y, nn_architecture, initialization='range_initialisation', lr=0.01, num_iterations=2000, print_cost=True, checkpoint_initialisation=None):

  np.random.seed(1)

  if checkpoint_initialisation == None:
    parameters = initialize_parameters(nn_architecture, initialization)
  else:
    print(f"Loading checkpoints from file {checkpoint_initialisation}")
    parameters = np.load(checkpoint_initialisation, allow_pickle=True).item()

  cost_list = []
  val_list = []
  #iterate over iterations

  for i in tqdm_notebook(range(0, num_iterations)):

    #forward step
    AL, caches = model_forward(X, parameters, nn_architecture)

    cost = loss.cross_entropy_loss(AL, y)

    grads = model_backward(AL, y, caches, nn_architecture)

    parameters = update_parameters(parameters, grads, lr)

    if (i + 1) % 10 == 0 and print_cost:
      print(f"The cost after {i + 1} iterations is: {cost: .4f}")
      acc = accuracy(val_features, parameters, y_val, nn_architecture)
      print(f"The val accuracy after {i + 1} iterations is: {acc}")
      val_list.append(acc)

    if i % 10 == 0:
      cost_list.append(cost)

    if i % 50 == 0:
      print(f"Creating checkpoint for {i}th iteration")
      np.save(f'params-{i}-{initialization}.npy', parameters)

    #plotting cost curve

  plt.figure(figsize=(10, 6))
  plt.plot(cost_list)
  plt.xlabel("Iterations (per 50)")
  plt.ylabel("Loss")
  plt.title(f"Loss curve for the learning rate = {lr}")

  plt.figure(figsize=(10, 6))
  plt.plot(val_list)
  plt.xlabel("Iterations (per 50)")
  plt.ylabel("Val Accuracy")
  plt.title(f"Val Accuracy curve for the learning rate = {lr}")

  return parameters

In [21]:
def accuracy(X, parameters, y, nn_architecture):
  probs, caches = model_forward(X, parameters, nn_architecture)
  labels = (probs >= 0.5) * 1
  accuracy = np.mean(labels == y) * 100

  return f"The accuracy rate is {accuracy: .2f}%."

In [None]:
params = model(train_features, y_train, nn_architecture, initialization='range_initialization', lr=0.01, num_iterations=300)

accuracy(test_features, params, y_test, nn_architecture)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

Creating checkpoint for 0th iteration
The cost after 10 iterations is: -3044697.1238
The val accuracy after 10 iterations is: The accuracy rate is  77.39%.
The cost after 20 iterations is: -3070226.3432
The val accuracy after 20 iterations is: The accuracy rate is  78.25%.
The cost after 30 iterations is: -3073481.9293
The val accuracy after 30 iterations is: The accuracy rate is  79.43%.
