<a href="https://colab.research.google.com/github/zahraDehghanian97/LSTM_sequence_prediction/blob/master/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Mount google drive**

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **prerequisit**

In [8]:
%tensorflow_version 2.x
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , LSTM , Dropout , Masking
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.backend import clear_session 
from tensorflow.keras.callbacks import ModelCheckpoint , EarlyStopping
from dataclasses import make_dataclass

# **load data**

In [9]:

data_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/predict_next_game/train.csv')
X = []
y = []
seq_lens = []

for d in data_train.values:
    temp = []
    for t in d[1].split(" "):
        temp.append(int(t))
    X.append((temp))
    y.append( d[2])
    seq_lens.append(len(temp))

data_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/predict_next_game/test.csv')
X_test = []
index_test = []
for d in data_test.values:
    index_test.append(d[0])
    temp = []
    for t in d[1].split(" "):
        temp.append(int(t))
    X_test.append((temp))
    


In [10]:
# constant
num_class = 7736
dimension = 1
lstm_units = 20

# **Reshape data**   : padding

In [11]:
# pad data with maximum len
special_value = -10.0
max_seq_len = max(seq_lens)
Xpad = np.full((len(X), max_seq_len, dimension), fill_value=special_value)
for s, x in enumerate(X):
    seq_len = len(x)
    xx = np.array(x)
    Xpad[s, 0:seq_len, :] = xx.reshape(seq_len,1)
y = np.array(y)

# split validation data from train data
Xpad_validation = Xpad[:200]
Xpad = Xpad[200:]
y_validation = y[:200]
y = y[200:]

# test data 
Xpad_test = np.full((len(X_test), max_seq_len, dimension), fill_value=special_value)
for s, x in enumerate(X_test):
    seq_len = len(x)
    xx = np.array(x)
    Xpad_test[s, 0:seq_len, :] = xx.reshape(seq_len,1)

# **LSTM Model**

In [None]:
clear_session()

model = Sequential()
model.add(Masking(mask_value=special_value, input_shape=(max_seq_len, dimension)))
model.add(LSTM(lstm_units, return_sequences=True, input_shape=(None,dimension)))
model.add(Dropout(rate=0.3))
model.add(LSTM(lstm_units, return_sequences=True))
model.add(Dropout(rate=0.3))
model.add(LSTM(lstm_units, return_sequences=True))
model.add(Dropout(rate=0.3))
model.add(LSTM(lstm_units))
model.add(Dropout(rate=0.2))
model.add(Dense(units=2 * num_class, activation='relu'))
model.add(Dense(units=num_class, activation='sigmoid'))
print(model.summary())

filepath="/content/drive/MyDrive/Colab Notebooks/predict_next_game/weights.hdf5"
#load the trained LSTM model
model.load_weights(filepath)

# callbacks
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
earlyStop = EarlyStopping(monitor='val_accuracy', patience=5)

# Fit the model
model.compile(optimizer='rmsprop', loss=SparseCategoricalCrossentropy(), metrics=['accuracy'])
model.fit(Xpad, y, epochs=150,validation_split = 0.1,callbacks=[checkpoint,earlyStop])



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking (Masking)            (None, 29, 1)             0         
_________________________________________________________________
lstm (LSTM)                  (None, 29, 20)            1760      
_________________________________________________________________
dropout (Dropout)            (None, 29, 20)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 29, 20)            3280      
_________________________________________________________________
dropout_1 (Dropout)          (None, 29, 20)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 29, 20)            3280      
_________________________________________________________________
dropout_2 (Dropout)          (None, 29, 20)            0

# **Test**

In [None]:
#compute hit rate for validation data
predicted_app = model.predict(Xpad_validation)
idx = (-predicted_app).argsort() 
hit = 0
for i in range(len(Xpad_validation)):
    if(y_validation[i]== idx[i,0] or y_validation[i]==idx[i,1] or y_validation[i]==idx[i,2] or y_validation[i]==idx[i,3] or y_validation[i]==idx[i,5] ):
        hit+=1 
print("hit rate = "+str(hit/len(Xpad_validation)))

# final result
predicted_app_test = model.predict(Xpad_test)
idx = (-predicted_app_test).argsort() 
# write to file
Point = make_dataclass("point", [("id", int), ("next_games", str)])
answer = []
for i in range(len(idx)):
  # proper format
  temp = ""
  for j in range(5):
    temp+= str(int(idx[i][j]))
    if j != 4 :
      temp += " "
  answer.append(Point(index_test[i],temp))
answer = pd.DataFrame(answer)
answer.to_csv('/content/drive/MyDrive/Colab Notebooks/predict_next_game/result.csv')
