In [9]:
import pandas as pd
import json

import numpy as np
from tqdm import tqdm

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

<h2>Loading in data<h2>

In [10]:
df_train = pd.read_csv('/home/capstone22/WildFIrePrediction/Isaac_data/isaac_train.csv')
df_valid = pd.read_csv('/home/capstone22/WildFIrePrediction/Isaac_data/isaac_valid.csv')
df_test = pd.read_csv('/home/capstone22/WildFIrePrediction/Isaac_data/isaac_test.csv')

<h2> Define features to be used <h2>

In [11]:
# selected_features = ['Road_Dist',
#                      'Elec_Dist',
#                      'Cultivated_Prop',
#                      'aet__maximum_Normal',
#                      'aet__mean_Normal',
#                      'aet__minimum_Normal',
#                      'cwd__maximum_Normal',
#                      'cwd__mean_Normal',
#                      'cwd__minimum_Normal',
#                      'pck__maximum_Normal',
#                      'pck__mean_Normal',
#                      'pck__minimum_Normal',
#                      'aet__mean_ThreeYear_Dev',
#                      'cwd__mean_ThreeYear_Dev',
#                      'Mean_Housing_Dens_25km']

selected_features = ['Elec_Dist',
                     'aet__maximum_Normal',
                     'aet__mean_Normal',
                     'aet__minimum_Normal',
                     'cwd__maximum_Normal',
                     'cwd__mean_Normal',
                     'cwd__minimum_Normal',
                     'cwd__mean_ThreeYear_Dev']

<h2>Model Building<h2>

In [12]:
class IssacData(Dataset):
    def __init__(self,dataframe, target, features,sequence_length=20000):
        self.features = features
        self.sequence_length = sequence_length
        self.y = torch.tensor(dataframe[target].values).float()
        self.x = torch.tensor(dataframe[features].values).float()

    def __len__(self):
        return self.x.shape[0]
    
    def __getitem__(self, i):
        if i >= (self.sequence_length-1):
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.x[0].repeat(self.sequence_length - i - 1,1)
            x = self.X[i_start:(i + 1), :]
            x = torch.cat((padding, x), 0)
        return x, self.y[i]

In [14]:
df_train

Unnamed: 0.1,Unnamed: 0,pixel_id,pixel_id.1,Road_Dist,Elec_Dist,Cultivated_Prop,Elev,time_Normal,aet__count_above_mean_Normal,aet__count_below_mean_Normal,...,pck__skewness_Normal,pck__standard_deviation_Normal,time,aet__mean_ThreeYear_Dev,cwd__mean_ThreeYear_Dev,Mean_Housing_Dens_25km,FFWI,FFWI_UCLA,time_since_fire,value
0,0,721345,721345,11000.0000,10.000000,0.000000,3114.0,195101_198012,4.733333,7.266667,...,0.936707,111.599601,200001_200212,3.171076,7.509119,26.736217,40.219440,56.0,74.0,0.0
1,1,741842,741842,10630.1455,6.324555,0.000000,625.0,195101_198012,5.066667,6.933333,...,0.000000,0.000000,200001_200212,0.850137,2.818136,3.738432,11.891601,29.0,74.0,0.0
2,2,667553,667553,5830.9517,4.242641,0.000466,108.0,195101_198012,5.266667,6.733333,...,0.000000,0.000000,200001_200212,0.916099,0.785598,28.166508,14.065838,21.0,74.0,0.0
3,3,567709,567709,2000.0000,4.000000,0.957647,32.0,195101_198012,4.066667,7.933333,...,0.000000,0.000000,200001_200212,0.269284,1.687698,14.826780,16.654646,29.0,74.0,0.0
4,4,580981,580981,2236.0679,10.816654,0.000000,1508.0,195101_198012,5.566667,6.433333,...,1.746941,93.189436,200001_200212,0.955209,4.356287,6.530105,23.529678,27.0,74.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,99995,1095042,1095042,0.0000,0.000000,0.000000,135.0,195101_198012,5.633333,6.366667,...,0.000000,0.000000,200901_201112,0.923710,0.836408,139.827637,13.711402,14.0,83.0,0.0
99996,99996,315567,315567,2828.4272,1.000000,0.000000,1391.0,195101_198012,4.666667,7.333333,...,2.141257,39.556457,200901_201112,0.177176,0.855658,8.381964,5.786750,24.0,83.0,0.0
99997,99997,1043276,1043276,3162.2776,2.236068,0.001176,204.0,195101_198012,4.700000,7.300000,...,0.000000,0.000000,200901_201112,0.099018,1.457016,51.117859,12.899940,21.0,59.0,0.0
99998,99998,746549,746549,6324.5550,4.242641,0.000000,341.0,195101_198012,1.433333,6.966667,...,0.000000,0.000000,200901_201112,0.335885,1.829186,5.411052,11.862068,28.0,83.0,0.0


In [13]:
TARGET = "value"

issac_train = IssacData(df_train, TARGET, selected_features)

issac_train.x

tensor([[ 10.0000,  69.2570,  23.5285,  ...,  52.9545,  19.4323,   7.5091],
        [  6.3246,  68.6188,  26.2526,  ...,  84.3014,  21.1050,   2.8181],
        [  4.2426,  41.2540,  14.1837,  ..., 101.7754,  21.9196,   0.7856],
        ...,
        [  2.2361,  51.4036,  19.0171,  ...,  93.9187,  44.2387,   1.4570],
        [  4.2426,  18.0711,   2.9230,  ..., 110.1388,  31.2072,   1.8292],
        [  2.0000,   0.0000,   0.0000,  ..., 114.3945,  37.5688,   0.9531]])

In [None]:
issac_train = IssacData(df_train, selected_features)

In [None]:
class Model(nn.Module):
    def __init__(self,features,num_features,hidden_size,device):
        super.__init__()
        self.device = device
        self.features = features
        self.num_features = num_features
        self.hidden_size = hidden_size

        self.l1 = nn.Linear(num_features,hidden_size)
        self.l2 = nn.Linear(hidden_size, )

    def forward(self, x):
        batch