In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


$
Importing\;neccesary\;libraries
$

In [0]:
import os
import random as rd
import numpy as np
import pickle
import matplotlib.pyplot as plt

$
Question\;1(a):
$

$
Data\;preprocessing
$

In [0]:
os.chdir('/content/drive/My Drive/RNN')
data = open('shakespeare_train.txt', 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 268330 characters, 62 unique.


$
Hyperparameters
$

In [0]:
hidden_size = 50 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

$
Model\;parameters
$

In [0]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

$
Function\;for\;reinitializing\;the\;parameters\;of\;RNN
$

In [0]:
def initialize(Wxh,Whh,Why,bh,by):
  Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
  Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
  Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
  bh = np.zeros((hidden_size, 1)) # hidden bias
  by = np.zeros((vocab_size, 1)) # output bias
  return (Wxh,Whh,Why,bh,by)

$
Forward\;Propagation
$

In [0]:
def ForwardPropagate(x,h,z,y,a,inputs,targets,temperature):
  loss = 0
  for t in range(len(inputs)):
    x[t] = np.zeros((vocab_size,1))                                                                                                                     
    x[t][inputs[t]] = 1 
    a[t]=np.dot(Wxh, x[t]) + np.dot(Whh, h[t-1]) + bh
    h[t] = np.tanh(a[t])                                                                                                             
    z[t] = np.dot(Why, h[t]) + by      
    z[t] = (1/temperature)*z[t]                                                                                                      
    y[t] = np.exp(z[t]) / np.sum(np.exp(z[t]))                                                                                                               
    loss += -np.log(y[t][targets[t],0])
  return x,h,z,y,a,loss

$
Back\;propagation
$

In [0]:
def Backpropagate(y,targets,h,x,inputs,temperature):
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(h[0])
  for t in reversed(range(len(inputs))):
    dz = np.copy(y[t])
    dz[targets[t]] -= 1  
    dz =(1/temperature)*dz
    dWhy += np.dot(dz, h[t].T)
    dby += dz
    dh = np.dot(Why.T, dz) + dhnext                                                                                                                                          
    da=dh*(1-np.square(h[t]))                                                                                                                      
    dbh += da 
    dWxh += np.dot(da, x[t].T)
    dWhh += np.dot(da, h[t-1].T)
    dhnext = np.dot(Whh.T, da)

  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return dWxh,dWhh,dWhy,dbh,dby

$
Train\;Function
$

In [0]:
def Network(inputs, targets, hprev,temperature):
  x, h, z, y, a,= {}, {}, {}, {},{} 
  h[-1] = np.copy(hprev)                                                                                                                                                                     
  x,h,z,y,a,loss=ForwardPropagate(x,h,z,y,a,inputs,targets,temperature)
  dWxh,dWhh,dWhy,dbh,dby=Backpropagate(y,targets,h,x,inputs,temperature)
  return loss, dWxh, dWhh, dWhy, dbh, dby, h[len(inputs)-1]

$
Sequence\;generation\;Function
$

In [0]:
def sample(h, seed_ix, n,temperature):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    y = (1/temperature)*y
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    # ix =list(p).index(max(list(p)))
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

In [0]:
def train(temperature,iterations):
  n, p = 0, 0
  mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
  smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
  while n<iterations:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n == 0: 
      hprev = np.zeros((hidden_size,1)) # reset RNN memory
      p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

    # sample from the model now and then
    # if n % 100 == 0:
    #   sample_ix = sample(hprev, inputs[0], 200)
    #   txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    #   print('----\n %s \n----' % (txt, ))

    # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = Network(inputs, targets
                                                      , hprev,temperature)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 100 == 0:print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
    
    # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                  [dWxh, dWhh, dWhy, dbh, dby], 
                                  [mWxh, mWhh, mWhy, mbh, mby]):
      mem += dparam * dparam
      param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter 

In [0]:
def test_RNN(test,hprev,temp):
  sample_ix = sample(hprev, test[0], 200,temperature=temp)
  txt = ''.join(ix_to_char[ix] for ix in sample_ix)
  print('----\n %s \n----' % (txt, ))

$
Question\;1:
$

$
Training\;the\;model\;with\;temperature=1
$

In [0]:
hidden_size=50
(Wxh,Whh,Why,bh,by)=initialize(Wxh,Whh,Why,bh,by)
train(
    temperature=1,
    iterations=5000
)

iter 0, loss: 103.178369
iter 100, loss: 101.683823
iter 200, loss: 98.998454
iter 300, loss: 95.824597
iter 400, loss: 92.806532
iter 500, loss: 89.996824
iter 600, loss: 87.365605
iter 700, loss: 85.120546
iter 800, loss: 82.495669
iter 900, loss: 80.219254
iter 1000, loss: 78.246847
iter 1100, loss: 76.133040
iter 1200, loss: 74.340712
iter 1300, loss: 72.743711
iter 1400, loss: 71.385373
iter 1500, loss: 69.914916
iter 1600, loss: 68.683496
iter 1700, loss: 67.431501
iter 1800, loss: 66.465713
iter 1900, loss: 65.326198
iter 2000, loss: 64.418137
iter 2100, loss: 63.666384
iter 2200, loss: 62.627470
iter 2300, loss: 61.989461
iter 2400, loss: 61.020770
iter 2500, loss: 60.202500
iter 2600, loss: 59.582207
iter 2700, loss: 58.975208
iter 2800, loss: 58.000346
iter 2900, loss: 57.592875
iter 3000, loss: 57.202112
iter 3100, loss: 56.700028
iter 3200, loss: 56.156092
iter 3300, loss: 55.788497
iter 3400, loss: 55.251842
iter 3500, loss: 54.891486
iter 3600, loss: 54.790770
iter 3700, 

$
Text\;generated\;at\;Temperature\;(1/\alpha)=1
$

In [0]:
test=[char_to_ix[ch] for ch in data[0:seq_length]]
hprev = np.zeros((hidden_size,1))
test_RNN(test,hprev,temp=1)

----
 ill and yut he seds; berede! he hoit totheld anst wearellC dfood.

CORIOLANUS:
The do prale the not- how surowellt; Ro hesso.

LUENIUS:
Wen haps on bracconftry arcoust you wher dve
Mh ely?
he shomll;  
----


$
Training\;the\;model\;with\;temperature=20
$

In [0]:
hidden_size=50
(Wxh,Whh,Why,bh,by)=initialize(Wxh,Whh,Why,bh,by)
train(
    temperature=20,
    iterations=5000
)

iter 0, loss: 103.178360
iter 100, loss: 101.177080
iter 200, loss: 99.226576
iter 300, loss: 97.570612
iter 400, loss: 95.942638
iter 500, loss: 94.801438
iter 600, loss: 93.342855
iter 700, loss: 92.064238
iter 800, loss: 90.803310
iter 900, loss: 89.534179
iter 1000, loss: 88.362052
iter 1100, loss: 87.119615
iter 1200, loss: 85.926893
iter 1300, loss: 84.805078
iter 1400, loss: 83.828153
iter 1500, loss: 82.870611
iter 1600, loss: 81.912088
iter 1700, loss: 81.003943
iter 1800, loss: 80.474359
iter 1900, loss: 79.535894
iter 2000, loss: 78.632422
iter 2100, loss: 77.924552
iter 2200, loss: 77.142219
iter 2300, loss: 76.535100
iter 2400, loss: 75.918893
iter 2500, loss: 75.342182
iter 2600, loss: 74.722640
iter 2700, loss: 74.113616
iter 2800, loss: 73.492291
iter 2900, loss: 72.998451
iter 3000, loss: 72.421025
iter 3100, loss: 72.039584
iter 3200, loss: 71.535000
iter 3300, loss: 71.052536
iter 3400, loss: 70.551121
iter 3500, loss: 70.092119
iter 3600, loss: 69.780031
iter 3700, 

$
Text\;Generation\;at\;temperature=20
$

In [0]:
test=[char_to_ix[ch] for ch in data[0:seq_length]]
hprev = np.zeros((hidden_size,1))
test_RNN(test,hprev,temp=20)

----
  be
Iet in nor,
th tine denum mal, nanake soewm
eotoer jesret veerdry saSnisseav.

IMUMLAOUURS
:UI:

NaIA
ONCNUUO&US:?:
A, wiee moanf ItiAom idgnamd taullteem, Whau sleen terees' py bhos noaa,r-gatyl, 
----


$
Training\;the\;model\;at\;temperature=0.5
$

In [0]:
hidden_size=50
(Wxh,Whh,Why,bh,by)=initialize(Wxh,Whh,Why,bh,by)
train(
    temperature=0.5,
    iterations=5000
)

iter 0, loss: 103.178368
iter 100, loss: 103.170514
iter 200, loss: 100.348789
iter 300, loss: 96.894320
iter 400, loss: 93.692754
iter 500, loss: 90.740584
iter 600, loss: 87.960935
iter 700, loss: 85.613626
iter 800, loss: 82.778802
iter 900, loss: 80.338325
iter 1000, loss: 78.308970
iter 1100, loss: 76.090927
iter 1200, loss: 74.233372
iter 1300, loss: 72.564992
iter 1400, loss: 71.131357
iter 1500, loss: 69.496593
iter 1600, loss: 68.222658
iter 1700, loss: 66.859533
iter 1800, loss: 65.825738
iter 1900, loss: 64.543633
iter 2000, loss: 63.566783
iter 2100, loss: 62.770639
iter 2200, loss: 61.597421
iter 2300, loss: 60.915302
iter 2400, loss: 59.808740
iter 2500, loss: 58.840202
iter 2600, loss: 58.194281
iter 2700, loss: 57.597277
iter 2800, loss: 56.577175
iter 2900, loss: 56.185706
iter 3000, loss: 55.822228
iter 3100, loss: 55.212233
iter 3200, loss: 54.616635
iter 3300, loss: 54.308595
iter 3400, loss: 53.789723
iter 3500, loss: 53.432018
iter 3600, loss: 53.330749
iter 3700,

$
Text\;generation\;at\;temperature=0.5
$

In [0]:
test=[char_to_ix[ch] for ch in data[0:seq_length]]
hprev = np.zeros((hidden_size,1))
test_RNN(test,hprev,temp=0.5)

----
 in tish to kef an ion your This she verall. I to alt es com, liny; not thes bray my nore wat shas arts nod the ande thom lle t aicr, oples,
Ast stond mwets lavers. I pores
youre
To V py?
Aglasthe cose 
----


$
Observations:At\;low\;temperature\;the\;model\;converges\;fast\;comapred\;to\;model\;with\;high\;temperature.
$

$
Question\;2:String\;Completion
$

In [0]:
#sentence for completion:
String=data[15:60]
print(String)
print('Actual Sentence:')
print('------')
print(data[15:261])
print('------')

Before we proceed any further, hear me speak.
Actual Sentence:
------
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We kn
------


$
Loading\;the\;weights\;of\;trained\;RNN
$

In [0]:
hidden_size=250
(Wxh,Whh,Why,bh,by)=initialize(Wxh,Whh,Why,bh,by)
with open("char-rnn-snapshot.pkl", 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    a = u.load()
# a = pickle.load(open("char-rnn-snapshot.pkl",'rb'))
Wxh = a["Wxh"] 
Whh = a["Whh"]
Why = a["Why"]
bh = a["bh"]
by = a["by"]
mWxh, mWhh, mWhy = a["mWxh"], a["mWhh"], a["mWhy"]
mbh, mby = a["mbh"], a["mby"]
chars, data_size= a["chars"].tolist(), a["data_size"].tolist()
vocab_size, char_to_ix = a["vocab_size"].tolist(), a["char_to_ix"].tolist() 
ix_to_char =  a["ix_to_char"].tolist()

$
Function\;for\;Calculating\;hidden\;state\;at\;the\;end\;of\;the\;sentence.
$

In [0]:
def hidden_state(h,sentence):
  x = np.zeros((vocab_size, 1))
  x[sentence[0]] = 1
  ixes = []
  for t in range(1,len(sentence)-1):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    ix = sentence[t]
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return h

$
Completing\;the\;string\;using\;RNN
$

In [0]:
test=[char_to_ix[ch] for ch in String]
print('String for Completion:')
print(String)
for i in range(5):
  print(' %dth Generated String:'%(i+1))
  hprev = np.zeros((hidden_size,1))
  hidden_st= hidden_state(h=hprev,sentence=test)
  sample_ix = sample(hidden_st,test[-1], 200,temperature=1)
  txt = ''.join(ix_to_char[ix] for ix in sample_ix)
  print('----\n %s \n----' % (txt, ))

String for Completion:
Before we proceed any further, hear me speak.
 1th Generated String:
----
 

CORIOLANUS:
To wast lesch bove them ix not fel: suven, to in the with
Ane pound What parch way, we of be that have
A'
Onl lel aceing; solf'd deap, passen speatssent now thet puon's anf I well lorgel 
----
 2th Generated String:
----
 
To tell pooble mo I flems!

MENENIUS:
I'lf for put for 'Tis my sovere, we do then! move-ined cits:
And what?

BRUTUS:
Which he the you:
Wereizense yim;
Farse.
As com love coan.

VOLUMNIA:
O'll thy th 
----
 3th Generated String:
----
 

SICINIUS:
Jovon do im;
Who ammion, lature mofurus he. Haged arpe, I'll him be ean,
I befoot hadge some.

COMINIUS:
For wis!

VALERIS:
When moves, my porsm lord for as hus whatter Buthon I Catay the  
----
 4th Generated String:
----
  From mo logst
Sonfellowe, go,
Oacon's:
And wood.

VOLUMNIA:
Fnafore,
A's pore you one whre's
'pees
Be and brack sadrelt whomt onrers sivetute,
Butind Rombmeds :
Wy coms in fon.
Wildes in call t

$
Question\;3:Reason\;for\;newlines\;or\;spaces\;after\;colon(:)
$

In [0]:
print('Index of semi colon is %d'%char_to_ix[':'])
print('Index of new line is %d'%char_to_ix['\n'])
print('Index of space is %d'%char_to_ix[' '])

Index of semi colon is 9
Index of new line is 0
Index of space is 2


$
Visualize\;weight\;matrices\;when\;input\;is\;semicolon(:)
$

In [0]:
def check(h, seed_ix, n,temperature,character1,character2):
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    if x[char_to_ix[character1]]==1:
      indic=list()
      for i in range(len(h)):
        if((h[i]>0 and Why[char_to_ix[character2]][i]>0)or(h[i]<0 and Why[char_to_ix[character2]][i]<0)):
          cord=list()
          cord.append(char_to_ix[character2])
          cord.append(i)
          indic.append(cord)
      print('The cordinates of Why which are responsible:')
      print(indic)
    y = np.dot(Why, h) + by
    y = (1/temperature)*y
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    # ix =list(p).index(max(list(p)))
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

$
Finding\;the\;indices\;of\;specific\;hidden\;neurons\;which\;fire\;when\;':'\;is\;given\;as\;input.
$

In [0]:
hidden_st1=np.zeros((hidden_size,1))
sample_ix = check(hidden_st1
                  ,char_to_ix[':']
                  , n=200
                  ,temperature=1
                  ,character1=':'
                  ,character2='\n')
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print('----\n %s \n----' % (txt, ))

The cordinates of Why which are responsible:
[[0, 0], [0, 2], [0, 5], [0, 7], [0, 8], [0, 9], [0, 10], [0, 11], [0, 14], [0, 15], [0, 16], [0, 17], [0, 18], [0, 19], [0, 20], [0, 23], [0, 24], [0, 25], [0, 26], [0, 27], [0, 28], [0, 29], [0, 31], [0, 32], [0, 33], [0, 34], [0, 35], [0, 36], [0, 37], [0, 39], [0, 40], [0, 42], [0, 44], [0, 46], [0, 51], [0, 52], [0, 53], [0, 54], [0, 55], [0, 59], [0, 60], [0, 62], [0, 63], [0, 64], [0, 65], [0, 67], [0, 68], [0, 70], [0, 72], [0, 73], [0, 75], [0, 76], [0, 77], [0, 78], [0, 79], [0, 80], [0, 81], [0, 82], [0, 84], [0, 85], [0, 86], [0, 87], [0, 88], [0, 89], [0, 90], [0, 91], [0, 92], [0, 94], [0, 95], [0, 96], [0, 97], [0, 99], [0, 100], [0, 102], [0, 103], [0, 106], [0, 108], [0, 109], [0, 110], [0, 111], [0, 113], [0, 114], [0, 116], [0, 118], [0, 120], [0, 123], [0, 124], [0, 125], [0, 126], [0, 128], [0, 129], [0, 131], [0, 132], [0, 133], [0, 137], [0, 138], [0, 139], [0, 141], [0, 142], [0, 143], [0, 144], [0, 145], [0, 146], [0

$
Explanation:We\;give\;the\;input\;to\;the\;RNN\;specific\;neurons\;fire\;which\;produce\;newline.Specific\;weights\;of\;Why\;which\;are\;connected\;to\;the\;index\;of\;newline\;output\;are\;responsible\;for\;the\;behaviour.
\\\implies Specific
\;weights\;are\;Why[index][index1]*h[index1]>0$

$
Question\;4:Another\;behaviour:
\\newline\;after\;letter\;'.'
$

In [0]:
hidden_st1=np.zeros((hidden_size,1))
sample_ix = check(hidden_st1
                  ,char_to_ix['.']
                  , n=200
                  ,temperature=1
                  ,character1='.'
                  ,character2='\n')
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print('----\n %s \n----' % (txt, ))

The cordinates of Why which are responsible:
[[0, 0], [0, 1], [0, 3], [0, 5], [0, 7], [0, 8], [0, 9], [0, 11], [0, 12], [0, 14], [0, 16], [0, 17], [0, 18], [0, 19], [0, 20], [0, 24], [0, 25], [0, 26], [0, 27], [0, 28], [0, 29], [0, 31], [0, 32], [0, 33], [0, 34], [0, 35], [0, 37], [0, 39], [0, 40], [0, 41], [0, 42], [0, 43], [0, 45], [0, 46], [0, 47], [0, 49], [0, 52], [0, 53], [0, 54], [0, 55], [0, 56], [0, 59], [0, 60], [0, 63], [0, 64], [0, 65], [0, 66], [0, 67], [0, 68], [0, 72], [0, 73], [0, 74], [0, 76], [0, 77], [0, 78], [0, 79], [0, 80], [0, 81], [0, 82], [0, 86], [0, 87], [0, 89], [0, 90], [0, 91], [0, 92], [0, 95], [0, 96], [0, 97], [0, 99], [0, 100], [0, 101], [0, 102], [0, 103], [0, 106], [0, 107], [0, 109], [0, 110], [0, 111], [0, 113], [0, 115], [0, 116], [0, 118], [0, 122], [0, 124], [0, 125], [0, 128], [0, 129], [0, 131], [0, 132], [0, 134], [0, 137], [0, 138], [0, 139], [0, 140], [0, 141], [0, 143], [0, 144], [0, 145], [0, 146], [0, 147], [0, 149], [0, 152], [0, 155], 

$
Explanation:We\;give\;the\;input\;to\;the\;RNN\;specific\;neurons\;fire\;which\;produce\;newline.Specific\;weights\;of\;Why\;which\;are\;connected\;to\;the\;index\;of\;newline\;output\;are\;responsible\;for\;the\;behaviour.
\\\implies Specific
\;weights\;are\;Why[index][index1]*h[index1]>0$