<a href="https://colab.research.google.com/github/ayushjain1144/NER/blob/master/NER_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import numpy as np
import sys

# Loading the features and vocabulary

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

!ln -s content/gdrive/My\ Drive/NER /ner_dir

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [3]:
%cd ..
%cd /ner_dir


/
/content/gdrive/My Drive/NER


In [4]:
!ls

activations.py		  loss.py		README.md	    ytest.npy
Conll.ipynb		  NER_NN.ipynb		test_features.npy   ytrain.npy
dataset			  NER_NN_network.ipynb	train_features.npy  yval.npy
initial_experiment.ipynb  nn.py			val_features.npy
initialization.py	  __pycache__		vocab.npy


In [5]:
import activations
import loss
import initialization as init_layer

In [12]:
vocab = np.load('vocab.npy')
train_features = np.load('train_features.npy').T
test_features = np.load('test_features.npy').T
val_features = np.load('val_features.npy').T
y_train = np.load('ytrain.npy').T
y_val = np.load('yval.npy').T
y_test = np.load('ytest.npy').T

In [13]:
print(train_features.shape)
print(vocab.shape)
print(test_features.shape)
print(val_features.shape)
print(y_train.shape)
print(y_test.shape)
print(y_val.shape)

(900, 204566)
(26872, 300)
(900, 46665)
(900, 51577)
(10, 204566)
(10, 46665)
(10, 51577)


In [14]:
train_features[:10]

array([[ 1.        , -0.92605459,  0.00982666, ...,  1.38121068,
         0.05078125,  1.46252757],
       [ 0.        , -1.13792351,  0.2265625 , ...,  0.8632994 ,
        -0.09326172, -0.07399186],
       [ 0.        , -0.7880129 ,  0.28125   , ...,  0.76144395,
         0.06494141,  0.03143081],
       ...,
       [ 0.        , -0.43820235, -0.03540039, ...,  0.7176954 ,
        -0.08154297,  0.11263644],
       [ 0.        , -0.95179252,  0.14746094, ...,  0.69231712,
         0.13085938,  0.80112245],
       [ 0.        , -1.45894089,  0.12890625, ..., -1.61798501,
         0.12597656, -0.11506672]])

In [15]:
y_train[:10]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [16]:
C = 1
word_vector_dim = 300
num_tags = 10

# Neural Netwwork Architecture

In [39]:
nn_architecture = [
  {"layer_size": 900, "activation": "none"},
  {"layer_size": 300, "activation": "relu"},
  {"layer_size": 100, "activation": "relu"},
  {"layer_size":10, "activation": "none"}
]

In [18]:
nn_architecture[1]['activation']

'relu'

# Initialize Parameters

In [19]:
def initialize_parameters(nn_architecture, initialization = "range_initialization", seed=5):

  parameters = {}
  num_layers = len(nn_architecture)

  for i in range(1, num_layers):

    if initialization == "range_initialization":
      parameters['W' + str(i)] = init_layer.range_initializtion(nn_architecture[i]["layer_size"],
                                            nn_architecture[i - 1]["layer_size"], seed)
    else:
      parameters['W' + str(i)] = init_layer.random_initializtion(nn_architecture[i]["layer_size"],
                                            nn_architecture[i - 1]["layer_size"], seed)

    parameters['b' + str(i)] = np.zeros((nn_architecture[i]["layer_size"], 1))

  return parameters

In [None]:
initialize_parameters(nn_architecture, initialization="range_initialization")

# Forward Propogation

In [35]:
# Z = W * X + b
# Here A is output of previous layer

def linear_forward(A_prev, W, b):
  return np.dot(W, A_prev) + b

# apply activation h:  A = h(X) 
def apply_activation(A, activation, alpha=0.01):

  if activation == "sigmoid":
    return activations.sigmoid(A)
  elif activation == "tanh":
    return activations.tanh(A)
  elif activation == "relu":
    return activations.relu(A)
  elif activation == "leaky_relu":
    return activations.leaky_relu(A, alpha)
  elif activation == 'none':
    return A;
  else:
    print(f"ERROR: {activation} activation not supported")
    sys.exit(1)
  
# driver forward propogation
def model_forward(X, parameters, nn_architecture, alpha=0.01):

  forward_cache = {}
  A = X
  num_layers = len(nn_architecture)

  for i in range(1, num_layers):
    A_prev = A
    W = parameters["W" + str(i)]
    b = parameters["b" + str(i)]

    Z = linear_forward(A_prev, W, b)
    activation = nn_architecture[i]['activation']
    A = apply_activation(Z, activation, alpha)

    forward_cache['Z' + str(i)] = Z
    forward_cache['A' + str(i - 1)] = A

  return A, forward_cache  

# Backpropogation 

In [22]:
def linear_backward(dz, cache):
  A_prev, W, b = cache
  m = A_prev.shape[1]

  dw = (1 / m) * np.dot(dz, A_prev.T)
  db = (1 / m) * np.sum(dZ, axis=1, keepims=True)
  dA_prev = np.dot(W.T, dZ)

  assert dA_prev.shape == A_prev.shape
  assert dW.shape == W.shape
  assert db.shape == b.shape

  return dA_prev, dw, db      

In [23]:
def apply_activation_backward(dA, cache, activation_fn):
  linear_cache, activation_cache = cache

  if activation_fn == "sigmoid":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "tanh":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "relu":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)

  elif activation_fn == "leaky_relu":
    dZ = activations.sigmoid_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
  
  elif activation_fn == "none":
    dZ = dA
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
  
  else:
    print("Activation not available")
    sys.exit(1)

  return dA_prev, dw, db

  


In [48]:
def model_backward(AL, y, caches, nn_architecture):
   
  y = y.reshape(AL.shape)
  L = len(caches)
  grads = {}

  dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))
  print(L)
  print(caches)
  grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = apply_activation_backward(dAL, caches[L-1], nn_architecture[L-1]['activation'])

  for l in range(L - 1, 0, -1):
    current_cache = caches[l - 1]
    grads["dA" + str(l - 1)], grads["dW" + str(l)],  \
        grads["db" + str(l)] = apply_activation_backward(
            grads["dA" + str(l)], current_cache, 
            nn_architecture[l]['activation']
        )
  return grads

In [49]:
def update_parameters(parameters, grads, lr):

  L = len(parameters)

  for l in range(1, L + 1):
    parameters["W" + str(l)] = parameters["W" + str(l)] - \
            lr * grads["dW" + str(l)]
    parametets["b" + str(l)] = parameters["b" + str(l)] - \
            lr * grads["db" + str(l)]
  return parameters

In [50]:
def model(X, y, nn_architecture, initialization='range_initialisation', lr=0.01, num_iterations=2000, print_cost=True):

  np.random.seed(1)

  parameters = initialize_parameters(nn_architecture, initialization)

  cost_list = []

  #iterate over iterations

  for i in range(num_iterations):

    #forward step
    AL, caches = model_forward(X, parameters, nn_architecture)

    cost = loss.cross_entropy_loss(AL, y)

    grads = model_backward(AL, y, caches, nn_architecture)

    parameters = update_parameters(parameters, grads, lr)

    if (i + 1) % 100 == 0 and print_cost:
      print(f"The cost after {i + 1} iterations is: {cost: .4f}")

    if i % 100 == 0:
      cost_list.append(cost)

    #plotting cost curve

    plt.figure(figsize=(10, 6))
    plt.plot(cost_list)
    plt.xlabel("Iterations (per hundreds)")
    plt.ylabel("Loss")
    plt.title(f"Loss curve for the learning rate = {lr}")

    return parameters

In [51]:
def accuracy(X, parameters, y, nn_architecture):
  probs, caches = model_forward(X, parameters, nn_architecture)
  labels = (probs >= 0.5) * 1
  accuracy = np.mean(labels == y) * 100

  return f"The accuracy rate is {accuracy: .2f}%."

In [52]:
params = model(train_features, y_train, nn_architecture, initialization='range_initialization', lr=0.01, num_iterations=3000, print_cost=True)

accuracy(X_test, params, y_test, nn_architecture)


6
{'Z1': array([[ 0.5297159 , -0.66358821, -0.81688732, ..., -0.71350158,
        -0.36861749, -0.37151481],
       [ 0.14650086,  1.25460723, -0.84837228, ..., -0.7358055 ,
         0.49316129, -0.50739974],
       [-0.86395663,  2.02697808,  0.25557028, ...,  0.44482093,
        -1.20023437, -0.46970608],
       ...,
       [-0.79716557,  0.84036755, -1.02397446, ...,  0.39510788,
        -0.57041576,  0.82304681],
       [-0.02517139, -0.04335126, -0.35848163, ...,  1.7170243 ,
        -1.31911873, -0.39130926],
       [-0.00897232, -1.60298961, -0.0200274 , ...,  0.25629046,
         1.71444287,  0.97357491]]), 'A0': array([[0.5297159 , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.14650086, 1.25460723, 0.        , ..., 0.        , 0.49316129,
        0.        ],
       [0.        , 2.02697808, 0.25557028, ..., 0.44482093, 0.        ,
        0.        ],
       ...,
       [0.        , 0.84036755, 0.        , ..., 0.39510788, 0.        ,
    

KeyError: ignored