# Time Series Prediction with RNN/LSTM/GRU Using PyTorch

In this notebook you will implememt RNN, LSTM and GRU models in order to predict future sales of Migros. Sales for Fruits and Vegetables are given for 2014-2017. You will train your model with 2014-2016 data and test on 2017 to see whether forecasting is done accurately.

There are some tutorials on Time Series Prediction, you can get help from them: 
https://towardsdatascience.com/predicting-sales-611cb5a252de

If you did not use pandas library before you can check this tutorial: 
https://data36.com/pandas-tutorial-1-basics-reading-data-files-dataframes-data-selection/


## Library

In [107]:
from datetime import datetime, timedelta,date
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from collections import Counter
import re
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data Processing

In [120]:
class Dataset(torch.utils.data.Dataset):
    def __init__(
        self,
        seq_length,
        path,
        mode='char',
    ):
        self.seq_length = seq_length
        self.mode = mode
        self.path = path
        self.data = self._read_data()
        
        self.unique_data = self._find_unique()

        self.index_to_data = {index: word for index, word in enumerate(self.unique_data)}
        self.data_to_index = {word: index for index, word in enumerate(self.unique_data)}

        self.data_indexes = [self.data_to_index[i] for i in self.data]

    def _read_data(self):
        text = open(self.path, 'rb').read().decode(encoding='utf-8')
        data = pd.Series(list(re.sub("[" + '\n\r\ufeff' + "]", '', text))) if self.mode =='char' \
                    else pd.Series(re.findall(r"[\w']+|[.,!?;]", text.lower()))
        return data

    def _find_unique(self):
        data_count = Counter(self.data)
        return sorted(data_count, key=data_count.get, reverse=True)

    def __len__(self):
        return len(self.data_indexes) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data_indexes[idx:idx+self.seq_length]),
            torch.tensor(self.data_indexes[idx+1:idx+self.seq_length+1]),
        )

In [121]:
dataset = Dataset(100, 'data/pride_prejudice.txt', 'word')
dataloader = DataLoader(dataset, batch_size=3)

In [123]:
text = open('data/pride_prejudice.txt', 'rb').read().decode(encoding='utf-8')
data = pd.Series(re.findall(r"[\w']+|[.,!?;]", text.lower()))

In [124]:
data

0               the
1           project
2         gutenberg
3             ebook
4                of
            ...    
144392         hear
144393        about
144394          new
144395       ebooks
144396            .
Length: 144397, dtype: object

# RNN, LSTM and GRU

For each model train and test models with both Fruit and Vegetables data and plot the results. Also give MAPE results

## Recurrent Neural Network (RNN) (10 points)

In [None]:
class RNN_Model(nn.Module):

    def __init__(self, input_size, hidden_size=128, num_layers=3, seq_length=20):
        super(RNN_Model, self).__init__()
        
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length 
        
        self.embedding = nn.Embedding(
            num_embeddings=input_size,
            embedding_dim=self.hidden_size,
        )
        self.lstm = nn.LSTM(
            input_size=self.hidden_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            dropout=0.2,
        )
        self.fc = nn.Linear(self.lstm_size, input_size)

    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, *prev_state)
        logits = self.fc(output)
        return logits, state
        
        return out
    
    def initialize(self):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size))

In [None]:
num_epochs = 2000
learning_rate = 0.001

input_size = dataset.

model = RNN_Model(input_size)

criterion = torch.nn.CrossEntropyLoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    pass

In [None]:
model.eval()
train_predict = model(testX)

data_predict = train_predict.data.numpy()
dataY_plot = testY.data.numpy()

data_predict = scaler.inverse_transform(data_predict)
dataY_plot = scaler.inverse_transform(dataY_plot)

In [None]:
#plot actual and predicted
plot_data = [
    go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=dataY_plot.flatten(),
        name='actual'
    ),
        go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=data_predict.flatten(),
        name='predicted'
    )
    
]

plot_layout = go.Layout(
        title='Sales Prediction'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

## Long Short Term Memory (LSTM) (10 points)

In [None]:
class LSTM_Model(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, seq_length):
        super(LSTM_Model, self).__init__()
        
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        pass

    def forward(self, x):
        pass
        
        return out

In [None]:
num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 4
num_layers = 1

model = LSTM_Model(input_size, hidden_size, num_layers, seq_length)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    pass

In [None]:
model.eval()
train_predict = model(testX)

data_predict = train_predict.data.numpy()
dataY_plot = testY.data.numpy()

data_predict = scaler.inverse_transform(data_predict)
dataY_plot = scaler.inverse_transform(dataY_plot)

In [None]:
#plot actual and predicted
plot_data = [
    go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=dataY_plot.flatten(),
        name='actual'
    ),
        go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=data_predict.flatten(),
        name='predicted'
    )
    
]

plot_layout = go.Layout(
        title='Sales Prediction'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

## Gated Recurrent Unit (GRU) (10 points)

In [None]:
class GRU_Model(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, seq_length):
        super(GRU_Model, self).__init__()
        
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        pass

    def forward(self, x):
        pass
        
        return out

In [None]:
num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 4
num_layers = 1


model = LSTM_Model(input_size, hidden_size, num_layers, seq_length)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    pass

In [None]:
model.eval()
train_predict = model(testX)

data_predict = train_predict.data.numpy()
dataY_plot = testY.data.numpy()

data_predict = scaler.inverse_transform(data_predict)
dataY_plot = scaler.inverse_transform(dataY_plot)

In [None]:
#plot actual and predicted
plot_data = [
    go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=dataY_plot.flatten(),
        name='actual'
    ),
        go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=data_predict.flatten(),
        name='predicted'
    )
    
]

plot_layout = go.Layout(
        title='Sales Prediction'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

## Feature Design (25 points)

In the sections above you have only used previous sales as features. Try adding different features such as temperature, dolar exchange rate, month(categorical), day of week(categorical), some special days (such as religious holidays, valentine's day, new year's day).

Then design a new model to train and test with these features. Try to reduce **MAPE** metric below to previous models.

In [None]:
df_sales = pd.read_csv('data/net_sales.txt',sep='\t', header = (0))

# Design your features

## Build, Train and Test Your Model for New Designed Features (25 points)

In [None]:
class Model(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, seq_length):
        super(Model, self).__init__()

        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        pass

    def forward(self, x):
        pass
        
        return out

In [None]:
#Change hyperparameters

num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 4
num_layers = 1

model = Model(input_size, hidden_size, num_layers, seq_length)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    pass

In [None]:
#plot actual and predicted
plot_data = [
    go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=dataY_plot.flatten(),
        name='actual'
    ),
        go.Scatter(
        x=df_sales['Tarih'][-365:],
        y=data_predict.flatten(),
        name='predicted'
    )
    
]

plot_layout = go.Layout(
        title='Sales Prediction'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)