# Assignment 0, Yifan Han, Oct 11 2024

## Question 1

In [1]:
import numpy as np
import tensorflow as tf
from datetime import date
import pandas as pd

In [2]:
# Generate Data
months = ["January", "February", "March", "April", "May", "June",
          "July", "August", "September", "October", "November", "December"]

inputs = "".join(set("".join(months) + "1234567890" +  " ,"))
outputs = "-1234567890"

def random_dates(n_dates):
    min_date = date(1900, 1, 1).toordinal()
    max_date = date(2500, 12, 31).toordinal()

    ordinals = np.random.randint(min_date, max_date, size=n_dates)
    dates = [date.fromordinal(ordinal) for ordinal in ordinals]

    x = [f"{months[dt.month - 1]} {dt.strftime('%d, %Y')}" for dt in dates]
    y = [dt.isoformat() for dt in dates]

    return x, y

def convert_date_string_to_ids(date_string, char_set=inputs):
    return [char_set.find(char) for char in date_string]

def format_date_strings(date_list, char_set=inputs):
    index_lists = [convert_date_string_to_ids(date, char_set) for date in date_list]
    tensor_data = tf.ragged.constant(index_lists, ragged_rank=1)
    return (tensor_data + 1).to_tensor()


def generate_data(num_dates):
    x, y = random_dates(num_dates)
    return format_date_strings(x, inputs), format_date_strings(y, outputs)

np.random.seed(2024)

X_train, Y_train = generate_data(20000)
X_valid, Y_valid = generate_data(1000)
X_test, Y_test = generate_data(1000)

In [3]:
# Encoder-Decoder RNN model 

embedding_dim = 32
max_output_length = Y_train.shape[1]

np.random.seed(2024)
tf.random.set_seed(2024)

# Define the encoder model
encoder_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(inputs) + 1,
                               output_dim=embedding_dim,
                               input_shape=[None]),
    tf.keras.layers.LSTM(128)
])

# Define the decoder model
decoder_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.Dense(len(outputs) + 1, activation="softmax")
])

# Combine encoder and decoder into a full model
full_model = tf.keras.Sequential([
    encoder_model,
    tf.keras.layers.RepeatVector(max_output_length),
    decoder_model
])

# Compile the model
optimizer = tf.keras.optimizers.Nadam()
full_model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
                   metrics=["accuracy"])

# Train the model
training_history = full_model.fit(X_train, Y_train, epochs=20,
                                   validation_data=(X_valid, Y_valid))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [4]:
# Test

def ids_to_date_strings(ids, char_set=outputs):
    return ["".join([("?" + char_set)[index] for index in sequence])
            for sequence in ids]

max_input_length = X_train.shape[1]

def prepare_padded_date_strings(date_strings):
    tensor_X = format_date_strings(date_strings)
    if tensor_X.shape[1] < max_input_length:
        tensor_X = tf.pad(tensor_X, [[0, 0], [0, max_input_length - tensor_X.shape[1]]])
    return tensor_X

def convert_to_date_strings(date_strings):
    padded_X = prepare_padded_date_strings(date_strings)
    predicted_ids = full_model.predict(padded_X).argmax(axis=-1)
    return ids_to_date_strings(predicted_ids)

example_dates = ["January 10, 2002", "May 26, 2000"]
converted_dates = convert_to_date_strings(example_dates)
print(converted_dates)

['2002-01-10', '2000-05-26']


## Question 2

In [5]:
import keras
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, LSTM, GRU

In [19]:
# Case 1: SimpleRNN with scalar input
model_rnn_scalar = keras.models.Sequential([
  keras.layers.SimpleRNN(20, return_sequences=True,
                         input_shape=[None, 1]),
  keras.layers.SimpleRNN(20, return_sequences=True),
  keras.layers.SimpleRNN(1)
])
model_rnn_scalar.summary()

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_17 (SimpleRNN)   (None, None, 20)          440       
                                                                 
 simple_rnn_18 (SimpleRNN)   (None, None, 20)          820       
                                                                 
 simple_rnn_19 (SimpleRNN)   (None, 1)                 22        
                                                                 
Total params: 1,282
Trainable params: 1,282
Non-trainable params: 0
_________________________________________________________________


### Case 1:  SimpleRNN layers as on slides, at each time step x(t) is a scalar.

#### Number of parameters = units x (units +input dimension +1)
#### Layer 1: 20 x (20+1+1) =440 (units=20, input dimension=1 since x(t) is a scalar)
#### Layer 2: 20 x (20+20+1) =820 (units=20, input dimension=20 since it has 20 units and return_sequences=True)
#### Layer 3: 1 x (20+1+1) = 22 

#### Total trainable parameters= 440+820+220=1282

In [21]:
# Case 2: SimpleRNN with vector input
model_rnn_vector = model_rnn_scalar = keras.models.Sequential([
  keras.layers.SimpleRNN(20, return_sequences=True,
                         input_shape=[None, 3]),
  keras.layers.SimpleRNN(20, return_sequences=True),
  keras.layers.SimpleRNN(1)
])
model_rnn_vector.summary()

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_23 (SimpleRNN)   (None, None, 20)          480       
                                                                 
 simple_rnn_24 (SimpleRNN)   (None, None, 20)          820       
                                                                 
 simple_rnn_25 (SimpleRNN)   (None, 1)                 22        
                                                                 
Total params: 1,322
Trainable params: 1,322
Non-trainable params: 0
_________________________________________________________________


### Case 2:  SimpleRNN layers as on slides, x(t) is a vector of 3 values.

#### Layer 1: 20 x (20+3+1) =480 (units=20, input dimension=1 since x(t) is a vector of 3 values)
#### Layer 2: 20 x (20+20+1) =820 (units=20, input dimension=20 since it has 20 units and return_sequences=True)
#### Layer 3: 1 x (20+1+1) = 22 

#### Total trainable parameters= 480+820+220=1322

In [24]:
# Case 3: SimpleRNN + Dense(1)
model_rnn_dense = keras.models.Sequential([
  keras.layers.SimpleRNN(20, return_sequences=True,
                         input_shape=[None, 1]),
  keras.layers.SimpleRNN(20, return_sequences=True),
  keras.layers.Dense(1)
])
model_rnn_dense.summary()

Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_31 (SimpleRNN)   (None, None, 20)          440       
                                                                 
 simple_rnn_32 (SimpleRNN)   (None, None, 20)          820       
                                                                 
 dense_10 (Dense)            (None, None, 1)           21        
                                                                 
Total params: 1,281
Trainable params: 1,281
Non-trainable params: 0
_________________________________________________________________


### Case 3:

#### Number of parameters = units x (units +input dimension +1)
#### Layer 1: 20 x (20+1+1) =440 
#### Layer 2: 20 x (20+20+1) =820 
#### Dense layer: Input Dimension×Output Dimension+Output Dimension = 20 x 1 +1 =21

#### Total trainable parameters= 440+820+21=1281

In [25]:
# Case 4: LSTM + Dense(1)
model_lstm_dense = keras.models.Sequential([
  keras.layers.LSTM(20, return_sequences=True,
                         input_shape=[None, 1]),
  keras.layers.LSTM(20, return_sequences=True),
  keras.layers.Dense(1)
])

model_lstm_dense.summary()

Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, None, 20)          1760      
                                                                 
 lstm_4 (LSTM)               (None, None, 20)          3280      
                                                                 
 dense_11 (Dense)            (None, None, 1)           21        
                                                                 
Total params: 5,061
Trainable params: 5,061
Non-trainable params: 0
_________________________________________________________________


### Case 4:  LSTM + Dense(1)

#### 4 weight matrices for the LSTM

#### Number of parameters = weight x units x (units +input dimension +1)
#### Layer 1: 4 x 20 x (20+1+1) =1760 
#### Layer 2: 4 x 20 x (20+20+1) =3280
#### Dense layer: Input Dimension×Output Dimension+Output Dimension = 20 x 1 +1 =21

#### Total parameters= 1760+3280+21 = 5061

In [27]:
# Case 5: GRU + Dense(1)
model_gru_dense = keras.models.Sequential([
  keras.layers.GRU(20, return_sequences=True,
                         input_shape=[None, 1]),
  keras.layers.GRU(20, return_sequences=True),
  keras.layers.Dense(1)
])
model_gru_dense.summary()

Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_3 (GRU)                 (None, None, 20)          1380      
                                                                 
 gru_4 (GRU)                 (None, None, 20)          2520      
                                                                 
 dense_13 (Dense)            (None, None, 1)           21        
                                                                 
Total params: 3,921
Trainable params: 3,921
Non-trainable params: 0
_________________________________________________________________


### Case 5:  GRU + Dense(1)

#### 3 weight matrices for the GRU

#### Number of parameters = weight x units x (units +input dimension +1)
#### Layer 1: 3 x 20 x (20+1+1+1) =1380 (GRU has an extra bias due to reset-after mechanism) 
#### Layer 2: 3 x 20 x (20+20+1+1) = 2520
#### Dense layer: Input Dimension×Output Dimension+Output Dimension = 20 x 1 +1 =21

#### Total parameters= 1380+2520+21 = 3921

