<a href="https://colab.research.google.com/github/jdasam/mas1004/blob/2024/live_coding/5_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# download data
!wget https://archive.ics.uci.edu/static/public/591/gender+by+name.zip
!unzip gender+by+name.zip

--2024-11-21 06:35:44--  https://archive.ics.uci.edu/static/public/591/gender+by+name.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘gender+by+name.zip’

gender+by+name.zip      [   <=>              ]   3.60M  6.41MB/s    in 0.6s    

2024-11-21 06:35:45 (6.41 MB/s) - ‘gender+by+name.zip’ saved [3774735]

Archive:  gender+by+name.zip
 extracting: name_gender_dataset.csv  


In [5]:
import pandas as pd
df = pd.read_csv('name_gender_dataset.csv')
unique_gender_df = df.drop_duplicates(['Name'])
names = unique_gender_df['Name'].values
genders = unique_gender_df['Gender'].values

# names.tolist()


- `nn.Linear()`: $\mathbf{Wx} + \mathbf{b}$
  - x $\in \mathbb{R}^d$

- `RNN`
  - $h_t = \tanh (W_{xh}x_t + W_{hh}h_{t-1} + b) $
  -  $h_t = \tanh (W_{xh}x_t + b_x + W_{hh}h_{t-1} + b_h) $

In [11]:
# Building Recurrent Neural Network
import torch
previous_hidden_state = torch.randn(7).tanh()
current_input = torch.randn(5)

print(previous_hidden_state, current_input)

tensor([-0.2374,  0.9852,  0.2457,  0.1904,  0.8968,  0.5553,  0.9421]) tensor([ 0.1504,  2.1990, -0.5906, -0.1655, -0.1131])


In [15]:
# Make longer input
number_of_tokens = 9
token_embedding_size = 5

input_sequence = torch.randn((number_of_tokens, token_embedding_size))
input_sequence.shape

torch.Size([9, 5])

In [18]:
for cur_input in input_sequence:
  print(cur_input)
# for i in range(len(input_sequence)):
#   print(input_sequence[i])

tensor([ 0.1883, -0.8698,  0.8420,  0.4382,  0.1950])
tensor([-0.6335, -1.2110, -0.5960,  0.1198,  0.5531])
tensor([ 0.3202,  0.0699,  1.1909, -0.9390, -1.0263])
tensor([-1.3726,  2.4775, -0.4886,  1.1196, -0.0905])
tensor([-0.8446,  1.8695, -0.3196,  0.0235,  0.5834])
tensor([ 1.2356, -0.3399,  0.1605,  0.2606,  1.1375])
tensor([-0.2882, -0.6564,  0.5704, -0.6938, -0.2129])
tensor([-1.7821, -1.2197,  0.2701,  0.1403,  0.6418])
tensor([ 0.1810, -0.7498, -1.6920, -1.3025,  1.3241])


In [20]:
import torch.nn as nn

class MyRNN:
  def __init__(self, input_dim, output_dim):
    self.xh = nn.Linear(input_dim, output_dim)
    self.hh = nn.Linear(output_dim, output_dim, bias=False)
    self.hidden_size = output_dim

  def run_one_step(self, current_input, previous_output):
    out = self.xh(current_input) + self.hh(previous_output)
    out = out.tanh()
    return out

  def run_sequence(self, input_sequence, last_hidden_state=None):
    if last_hidden_state is None:
      last_hidden_state = torch.zeros(self.hidden_size)

    outputs = []
    for cur_input in input_sequence:
      last_hidden_state = self.run_one_step(cur_input, last_hidden_state)
      outputs.append(last_hidden_state)

    return torch.stack(outputs)


rnn = MyRNN(input_dim=5, output_dim=7)
# rnn.run_one_step(current_input, previous_hidden_state)
rnn.run_sequence(input_sequence)

tensor([[-0.4771, -0.3320,  0.0756, -0.4062,  0.1258,  0.0620, -0.1833],
        [-0.5957,  0.4569,  0.0726,  0.1967, -0.6884, -0.5386,  0.5966],
        [-0.4063, -0.6572, -0.0639, -0.8374,  0.6214,  0.6223, -0.6699],
        [ 0.2159,  0.7672,  0.9366, -0.3814,  0.6713,  0.7312,  0.8167],
        [ 0.1331,  0.7417,  0.8792, -0.7471,  0.4889, -0.0286, -0.0763],
        [-0.0055,  0.7557,  0.1285, -0.2088,  0.6357, -0.7966, -0.6023],
        [-0.4253, -0.1544, -0.4811, -0.3965,  0.4023, -0.0944, -0.1270],
        [-0.4319, -0.3542,  0.2626, -0.3648, -0.7713,  0.0845,  0.5615],
        [-0.1893,  0.9389,  0.1143,  0.0564, -0.9426, -0.9483,  0.2609]],
       grad_fn=<StackBackward0>)

In [22]:
modified_sequence = input_sequence.clone()
modified_sequence[4,:] = 0
modified_sequence

tensor([[ 0.1883, -0.8698,  0.8420,  0.4382,  0.1950],
        [-0.6335, -1.2110, -0.5960,  0.1198,  0.5531],
        [ 0.3202,  0.0699,  1.1909, -0.9390, -1.0263],
        [-1.3726,  2.4775, -0.4886,  1.1196, -0.0905],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 1.2356, -0.3399,  0.1605,  0.2606,  1.1375],
        [-0.2882, -0.6564,  0.5704, -0.6938, -0.2129],
        [-1.7821, -1.2197,  0.2701,  0.1403,  0.6418],
        [ 0.1810, -0.7498, -1.6920, -1.3025,  1.3241]])

In [23]:
rnn.run_sequence(modified_sequence)

tensor([[-0.4771, -0.3320,  0.0756, -0.4062,  0.1258,  0.0620, -0.1833],
        [-0.5957,  0.4569,  0.0726,  0.1967, -0.6884, -0.5386,  0.5966],
        [-0.4063, -0.6572, -0.0639, -0.8374,  0.6214,  0.6223, -0.6699],
        [ 0.2159,  0.7672,  0.9366, -0.3814,  0.6713,  0.7312,  0.8167],
        [-0.3479,  0.4803,  0.3535, -0.4024,  0.5282, -0.2599, -0.2210],
        [ 0.1090,  0.6747, -0.0148, -0.1594,  0.5768, -0.7119, -0.5218],
        [-0.3857, -0.1714, -0.4636, -0.4201,  0.2944, -0.0791, -0.1076],
        [-0.4556, -0.3612,  0.2993, -0.3882, -0.7900,  0.0836,  0.5567],
        [-0.1985,  0.9402,  0.1258,  0.0591, -0.9414, -0.9486,  0.2587]],
       grad_fn=<StackBackward0>)