<a href="https://colab.research.google.com/github/avikumart/LLM-GenAI-Transformers-Notebooks/blob/main/DeepLearningFiles/Time_series_and_encoder_decoder_implementations_DL_with_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Time-series prediction using RNNs

In [2]:
# Build and create ConvNet and RNN models for image and text data classifications from the chollat's book
import statsmodels
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense,GRU
from statsmodels.datasets import get_rdataset

In [3]:
# import dataset
data = get_rdataset("AirPassengers", "datasets").data
data

Unnamed: 0,time,value
0,1949.000000,112
1,1949.083333,118
2,1949.166667,132
3,1949.250000,129
4,1949.333333,121
...,...,...
139,1960.583333,606
140,1960.666667,508
141,1960.750000,461
142,1960.833333,390


In [4]:
data.index = pd.to_datetime(data["time"])
data.drop(columns="time", inplace=True)

In [5]:
data.head()

Unnamed: 0_level_0,value
time,Unnamed: 1_level_1
1970-01-01 00:00:00.000001949,112
1970-01-01 00:00:00.000001949,118
1970-01-01 00:00:00.000001949,132
1970-01-01 00:00:00.000001949,129
1970-01-01 00:00:00.000001949,121


In [6]:
# normalize the data
data = (data - data.mean())/data.std()

In [7]:
# convert the 1D data into sequential data
def create_seq(data, seq_len):
    x = []
    y = []
    for i in range(len(data) - seq_len):
      x.append(data[i:(i+seq_len)])
      y.append(data[i+seq_len])
    return np.array(x), np.array(y)

In [8]:
seq_len = 14
X, y = create_seq(data.values, seq_len)

In [9]:
split = int(0.8*len(data))

In [10]:
# train and test split
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [11]:
# design the model
model = Sequential()
model.add(LSTM(64, return_sequences=True,activation="tanh",input_shape=(seq_len, 1)))
model.add(GRU(64, activation="tanh"))
model.add(Dense(1, activation="linear"))

  super().__init__(**kwargs)


In [12]:
# compile the model
model.compile(optimizer="adam", loss="mse")

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 208ms/step - loss: 0.5947 - val_loss: 1.1580
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.2664 - val_loss: 0.4526
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.1921 - val_loss: 0.4108
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.1686 - val_loss: 0.4078
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.1763 - val_loss: 0.3814
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.1482 - val_loss: 0.3701
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.1581 - val_loss: 0.3638
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 0.1417 - val_loss: 0.3521
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [13]:
test_loss = model.evaluate(X_test,y_test)
print("Test loss of the function:",test_loss)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 0.2540
Test loss of the function: 0.25404518842697144


## English to French translation encoder-decoder architecture

In [14]:
import numpy as np

# Define the sample dataset
input_texts = ['I love pizza', 'She likes ice cream', 'They drink tea']
target_texts = ['J\'aime la pizza', 'Elle aime la glace', 'Ils boivent du thé']

In [15]:
input_vocab = set()
output_vocab = set()

In [16]:
for it, tt in zip(input_texts,target_texts):
  input_vocab.update(it.split())
  output_vocab.update(tt.split())

In [19]:
# sort the vocab and put it into a list
input_vocab = sorted(list(input_vocab))
output_vocab = sorted(list(output_vocab))

In [20]:
# create word index
input_word_index = {word: i for i, word in enumerate(input_vocab)}
output_word_index = {word: i for i, word in enumerate(output_vocab)}

In [24]:
# define the one-hot encoding param
max_input_len = max([len(it.split()) for it in input_texts])
max_output_len = max([len(tt.split()) for tt in target_texts])
encoder_tokens = len(input_vocab)
decoder_tokens = len(output_vocab)

In [31]:
# generate one-hote encoding matrices
encoder_input_data = np.zeros((len(input_texts), max_input_len, encoder_tokens), dtype="float32")
decoder_input_data = np.zeros((len(input_texts), max_output_len, decoder_tokens), dtype="float32")
decoder_target_data = np.zeros((len(input_texts), max_output_len, decoder_tokens), dtype="float32")

In [32]:
for i, (input_texts, target_texts) in enumerate(zip(input_texts, target_texts)):
  for t, word in enumerate(input_texts.split()):
    encoder_input_data[i, t, input_word_index[word]] = 1.0
  for t, word in enumerate(target_texts.split()):
    decoder_input_data[i, t, output_word_index[word]] = 1.0
    if t > 0:
      decoder_target_data[i, t-1, output_word_index[word]] = 1.0

In [42]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

# define the encoder
encoder_input = Input(shape=(max_input_len, encoder_tokens))
encoder_lstm = LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_input)
encoder_states = [state_h, state_c]

# define the decoder
decoder_input = Input(shape=(max_input_len, decoder_tokens))
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs,_,_ = decoder_lstm(decoder_input, initial_state=encoder_states)
decoder_dense = Dense(decoder_tokens, activation="softmax")
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_input,decoder_input], decoder_outputs)

In [43]:
model.summary()

In [44]:
# compile the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# fit the model
model.fit([encoder_input_data,decoder_input_data], decoder_target_data, batch_size=32, epochs=100, validation_split=0.2)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 1.4377 - val_loss: 1.7438
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step - loss: 1.4038 - val_loss: 1.7503
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step - loss: 1.3767 - val_loss: 1.7569
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 1.3507 - val_loss: 1.7643
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - loss: 1.3238 - val_loss: 1.7729
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - loss: 1.2949 - val_loss: 1.7833
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step - loss: 1.2629 - val_loss: 1.7961
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step - loss: 1.2267 - val_loss: 1.8120
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x7de165666e90>

- Drawback of one-hot encoding:
1. Sparse matrix
2. Difficult to converge the model
3. Not scalable for large dataset