___

# Machine Learning in Geosciences ] 
Department of Applied Geoinformatics and Carthography, Charles University

Lukas Brodsky lukas.brodsky@natur.cuni.cz


## Deep Learning, Sequnces (RNN)


This notebook introduces the Deep Learning Recurrent Neural Networks with PyTorch https://pytorch.org: 

* Temperature prediction with RNN


# Setup

In [None]:
# Common imports
import os
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# PyTorch imports 
import torch
import torch.nn as nn
from torch.autograd import Variable


# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
# mpl.rc('axes', labelsize=14)
# mpl.rc('xtick', labelsize=12)
# mpl.rc('ytick', labelsize=12)
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 15
fig_size[1] = 5
plt.rcParams["figure.figsize"] = fig_size


# Where to save the figures
PROJECT_ROOT_DIR = "."

def image_path(fig_id):
    return os.path.join(PROJECT_ROOT_DIR, "images", fig_id)

def save_fig(fig_id, tight_layout=True):
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(image_path(fig_id) + ".png", format='png', dpi=300)

In [None]:
# print(torch.__version__)
# print(torchvision.__version__)

### Data

Data from https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data
licened under https://creativecommons.org/licenses/by-nc-sa/4.0/

In [None]:
os.listdir('./data')

In [None]:
os.path.isfile('./data/GlobalLandTemperaturesByState.csv')

In [None]:
df = pd.read_csv('./data/GlobalLandTemperaturesByState.csv')
# df.State.unique()

In [None]:
df = df.loc[df.State == 'Assam'] # 'Massachusetts'
df = df.dropna()
df['dt'] = pd.to_datetime(df.dt)
start_year = 1990
df = df.loc[df.dt >= datetime.datetime(start_year, 1, 1)]   # start
df = df.loc[df.dt < datetime.datetime(2012, 1, 1)]    # end
df['month'] = df.dt.dt.month
df['year'] = df.dt.dt.year

data = df[['year', 'month', 'AverageTemperature']]
data = data.rename(
    columns={
        "AverageTemperature": "temperature"
    }
)

In [None]:
data.columns

In [None]:
data.month.size

In [None]:
data['temperature'].min()

In [None]:
# Test plot
plt.ylabel('Temperature')
plt.xlabel('Months')
plt.vlines([x + 12 for x in range(data['month'].size + 12) if x%12 == 0], 
           data['temperature'].min(), data['temperature'].max(), linestyles='dashed', colors='gray')
plt.plot(np.arange(0, data.month.size), data['temperature'], 'b.--')

### Split data

In [None]:
all_data = data['temperature'].values.astype(float)
# last 12 months
test_data_size = 12
train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]

print(len(train_data))
print(len(test_data))

In [None]:
# train data 
train_data_ = torch.FloatTensor(train_data).view(-1)

# periodicity 
train_window = 12

def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))

    return inout_seq
 
# tuples of x and y    
train_inout_seq = create_inout_sequences(train_data_, train_window)

train_inout_seq[0]
# train_inout_seq[:5]


### RNN (LSTM) model

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))

        return predictions[-1]


In [None]:
# model instance
model = LSTM()

print(model)

In [None]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)


### Model training

In [None]:
epochs = 100

for i in range(1, epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))

        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i%10 == 1:
        print('epoch: {}, loss: {}'.format(i-1, single_loss.item()))

print('epoch: {}, loss: {}'.format(i, single_loss.item()))


In [None]:
# test 
fut_pred = 12
test_inputs = train_data_[-train_window:].tolist()

model.eval()
for i in range(fut_pred):
    seq = torch.FloatTensor(test_inputs[-train_window:])
    with torch.no_grad():
        model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))
        test_inputs.append(model(seq).item())

# test_inputs[fut_pred:]

actual_predictions = np.array(test_inputs[train_window:] ).reshape(-1, 1)
print(actual_predictions)

In [None]:
# test plot
x_orig = np.arange(0, data['month'].size, 1)
x_train = np.arange(0, len(train_data), 1)
x_test = np.arange(len(train_data), len(train_data) + 12, 1)

plt.ylabel('Temperature')
x_label = 'Months: starting ' + str(start_year)
plt.xlabel(x_label)
# plt.autoscale(axis='x', tight=True)
plt.vlines([x + 12 for x in range(len(train_data) + 12) if x%12 == 0], 
           data['temperature'].min(), data['temperature'].max(), linestyles='dashed', colors='gray')
plt.plot(x_orig, data['temperature'], 'g.-')
plt.plot(x_train, train_data, 'b-')
plt.plot(x_test, actual_predictions, 'ro--')
plt.show()

In [None]:
plt.plot(data['month'], data['temperature'], 'b.')
plt.ylabel('Temperature')
plt.xlabel('Month')
plt.plot(np.arange(1, 12+1), actual_predictions, 'ro--')