In [1]:
import pandas as pd
import json

import numpy as np
from tqdm import tqdm

from datetime import datetime
from time import time
import os

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import math
import argparse

In [2]:
torch.cuda.get_device_name(1)

'NVIDIA GeForce RTX 2080 Ti'

In [3]:
df_train_1 = pd.read_csv('/home/capstone22/WildFIrePrediction/Isaac_data/isaac_train.csv')

# df_valid = pd.read_csv('/home/capstone22/WildFIrePrediction/Isaac_data/isaac_valid.csv')
# df_test = pd.read_csv('/home/capstone22/WildFIrePrediction/Isaac_data/isaac_test.csv')

In [4]:
selected_features = ['value',
                     'Road_Dist',
                     'Elec_Dist',
                     'Mean_Housing_Dens_25km',
                     'aet__mean_Normal',
                     'cwd__mean_Normal',
                     'cwd__mean_ThreeYear_Dev',
#                      'time_since_fire'
                     'FFWI']

In [5]:
VALUE = 'value'
FORECAST = 50000

TARGET = f"{VALUE}_lead{FORECAST}"

df_train_1[TARGET] = df_train_1[VALUE].shift(-FORECAST)
df_train_1 = df_train_1.iloc[:-FORECAST]

print(len(df_train_1))

50000


In [6]:
df_train = df_train_1[df_train_1['Unnamed: 0'] < 41500].copy()
df_valid = df_train_1[df_train_1['Unnamed: 0'] >= 41500].copy()

print("Validation set fraction:", len(df_valid) / len(df_train))

Validation set fraction: 0.20481927710843373


In [7]:
print(len(df_train))

41500


In [8]:
target_mean = df_train[TARGET].mean()
target_std = df_train[TARGET].std()

for col in df_train.columns:
    if col in selected_features:
        mean = df_train[col].mean()
        std = df_train[col].std()

        df_train[col] = (df_train[col] - mean) / std
        df_valid[col] = (df_valid[col] - mean) / std

In [9]:
class IsaacData(Dataset):
    def __init__(self, dataframe, target, features, sequence_length=10):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.y = torch.tensor(dataframe[target].values).float()
        self.X = torch.tensor(dataframe[features].values).float()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.X[0].repeat(self.sequence_length - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)

        return x, self.y[i]

In [10]:
isaac_train = IsaacData(df_train, TARGET, selected_features)
isaac_valid = IsaacData(df_valid, TARGET, selected_features)

trainloader = DataLoader(isaac_train, batch_size=10, shuffle=False, num_workers=16)
valloader = DataLoader(isaac_valid, batch_size=10, shuffle=False, num_workers=8)

In [11]:
X, y = next(iter(trainloader))

print("Features shape:", X.shape)
print("Target shape:", y.shape)
print(len(trainloader))

Features shape: torch.Size([10, 10, 8])
Target shape: torch.Size([10])
4150


In [12]:
class LSTM(nn.Module):
    def __init__(self, features, hidden_units, device):
        super().__init__()
        self.device = device
        self.features = features # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.lstm = nn.LSTM(
            input_size=features,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = (torch.zeros(self.num_layers, batch_size, self.hidden_units).to(self.device)).requires_grad_()
        c0 = (torch.zeros(self.num_layers, batch_size, self.hidden_units).to(self.device)).requires_grad_()
        
        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.linear(hn[0]).flatten()  # First dim of Hn is num_layers, which is set to 1 above.

        return out

In [13]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(DEVICE)
NUM_FEATURES = len(selected_features)
HIDDEN_SIZE = 512

model = LSTM(NUM_FEATURES, HIDDEN_SIZE, DEVICE)
model= nn.DataParallel(model)
model.to(DEVICE)

loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

cuda


In [14]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()
    
    for X, y in data_loader:
        output = model(X.to(DEVICE))
        loss = loss_function(output, y.to(DEVICE))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function, best_model, min_loss):
    
    num_batches = len(data_loader)
    total_loss = 0
    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X.to(DEVICE))
            total_loss += loss_function(output, y.to(DEVICE)).item()
        else:
            # Save model based on loss
            if total_loss < min_loss:
                min_loss = total_loss
                best_model = model
#                 torch.jit.script(model).save(f'{model_name}.pt') 
#                 print('Model saved!')

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")

In [15]:
min_loss = np.inf
best_model = model
for ix_epoch in range(15):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(trainloader, model, loss_function, optimizer=optimizer)
    test_model(valloader, model, loss_function, best_model, min_loss)
    print()

Epoch 0
---------
Train loss: 0.0032473070849701057
Test loss: 0.002115310800371479

Epoch 1
---------
Train loss: 0.0032168936047182696
Test loss: 0.002118696405857722

Epoch 2
---------
Train loss: 0.003207651841778989
Test loss: 0.0021168714426722138

Epoch 3
---------


KeyboardInterrupt: 