In [1]:
import pandas as pd
import dask.dataframe as dd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from tqdm import tqdm
import tensorflow as tf
from torch.utils.data import DataLoader, Dataset
import torch
from data_import import *

In [2]:
oregon_data_dict = oregon_import()

file import: 100%|██████████| 3/3 [00:00<00:00,  4.36it/s]


In [3]:
# wa_df = oregon_data_dict['train_timeseries'].iloc[4:,:].reset_index(inplace=False, drop=True)
wa_df = oregon_data_dict['train_timeseries'].copy()
wa_df = wa_df[wa_df['fips']==41067]
wa_df.drop(columns=['fips'],inplace=True)
wa_df = wa_df.iloc[4:,:]
wa_df = wa_df.iloc[:-4,:]
wa_df['date'] = wa_df['date'].map(pd.Timestamp.timestamp)
wa_df.reset_index(inplace=True,drop=True)

In [20]:
X_1 = wa_df.iloc[:7, (wa_df.columns != 'score') & (wa_df.columns != 'date')]
y_1 = wa_df.iloc[6, wa_df.columns == 'score']
date_1 = wa_df.iloc[:7, wa_df.columns == 'date']

In [6]:
X_2 = wa_df.iloc[7:14, (wa_df.columns != 'score') & (wa_df.columns != 'date')]
y_2 = wa_df.iloc[13, wa_df.columns == 'score']
date_2 = wa_df.iloc[7:14, wa_df.columns == 'date']

In [45]:
class DroughtDataset(Dataset):
    """
    Readying Drought dataset for model.
    """
    def __init__(self, df):
        self.X = torch.tensor(df.iloc[:, (df.columns != 'score') & (df.columns != 'date')].values)
        self.y = torch.tensor(df.iloc[:, df.columns == 'score'].dropna().values)
        self.date = np.array(df.iloc[:, df.columns == 'date'].values)

    def __len__(self):
        return int(len(self.y))
    
    def __getitem__(self, index):
        output_X = self.X[7*index:7+7*index]
        output_y = self.y[index]
        output_date = self.date[7*index:7+7*index]

        return output_X, output_y, output_date

In [67]:
wa_dataset = DroughtDataset(wa_df)
wa_data_loader = DataLoader(wa_dataset, batch_size = 16, shuffle=True)

In [48]:
for batch, (soil_info, drought_rating, date) in enumerate(wa_data_loader):
    print(batch)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55


In [7]:
import torch.nn as nn
from torch import optim
torch.manual_seed(1)

<torch._C.Generator at 0x24f84975550>

In [62]:
# hyperparameters
sequence_len = 7
input_len = 18 # number of independent variable columns
hidden_size = 128
num_layers = 2
num_epochs = 15
learning_rate = 1e-4

In [28]:
class LSTM_Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim = 1):
        super(LSTM_Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h_0=None, c_0=None):
        # managing hidden states and cell states
        if h_0 is None or c_0 is None:
            h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
            c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
        
        out, (h_n, c_n) = self.lstm(x, (h_0, c_0))
        out = self.output_layer(out[:, -1, :])
        return out

In [29]:
model = LSTM_Model(input_dim = input_len, hidden_dim = hidden_size, num_layers = num_layers)
print(model)

LSTM_Model(
  (lstm): LSTM(18, 128, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=128, out_features=1, bias=True)
)


In [11]:
loss_function = nn.MSELoss();
optimizer = optim.Adam(model.parameters(), lr=learning_rate);

  from .autonotebook import tqdm as notebook_tqdm


In [53]:
def train(num_epochs: int, model: nn.Module, loss_fn, optimizer, train_data_loader):
    """
    trains the model.

    Parameters
    ----------
    'num_epochs' : int
    'model' : nn.Module
    'train_data_loader'
    """

    total_steps = len(train_data_loader)

    for epoch in range(num_epochs):
        for batch, (soil_info, drought_rating, date) in enumerate(train_data_loader):
            output = model(soil_info)
            loss = loss_fn(output, drought_rating)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (batch+1)%10 == 0:
                print(f'Epoch: {epoch+1}; Batch: {batch+1} / {total_steps}; Loss: {loss.item():>4f}') # rounding the loss
    return None

In [68]:
train(num_epochs=num_epochs, model=model, loss_fn=loss_function, optimizer=optimizer, train_data_loader=wa_data_loader)

Epoch: 1; Batch: 10 / 56; Loss: 0.938597
Epoch: 1; Batch: 20 / 56; Loss: 1.208475
Epoch: 1; Batch: 30 / 56; Loss: 1.680801
Epoch: 1; Batch: 40 / 56; Loss: 1.278661
Epoch: 1; Batch: 50 / 56; Loss: 0.796126
Epoch: 2; Batch: 10 / 56; Loss: 0.603793
Epoch: 2; Batch: 20 / 56; Loss: 0.870318
Epoch: 2; Batch: 30 / 56; Loss: 1.188012
Epoch: 2; Batch: 40 / 56; Loss: 0.939582
Epoch: 2; Batch: 50 / 56; Loss: 0.655015
Epoch: 3; Batch: 10 / 56; Loss: 1.625801
Epoch: 3; Batch: 20 / 56; Loss: 0.603285
Epoch: 3; Batch: 30 / 56; Loss: 0.341574
Epoch: 3; Batch: 40 / 56; Loss: 0.846237
Epoch: 3; Batch: 50 / 56; Loss: 0.650849
Epoch: 4; Batch: 10 / 56; Loss: 0.727337
Epoch: 4; Batch: 20 / 56; Loss: 0.546364
Epoch: 4; Batch: 30 / 56; Loss: 0.699942
Epoch: 4; Batch: 40 / 56; Loss: 1.542431
Epoch: 4; Batch: 50 / 56; Loss: 0.527385
Epoch: 5; Batch: 10 / 56; Loss: 1.559371
Epoch: 5; Batch: 20 / 56; Loss: 1.344074
Epoch: 5; Batch: 30 / 56; Loss: 0.765897
Epoch: 5; Batch: 40 / 56; Loss: 0.610124
Epoch: 5; Batch: