In [1]:
import pandas as pd
import json

import numpy as np
from tqdm import tqdm

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchsummary import summary

In [4]:
df = pd.read_csv('/home/capstone22/WildFIrePrediction/isaac_data/processed_log_reg/2003_2013_shuffled.csv')

df_no_fire = df.loc[df['value'] == 0]
df_fires = df.loc[df['value'] == 1]

print('Num no fires =', len(df_no_fire))
print('Num fires =', len(df_fires))

df = df_no_fire.sample(frac = 0.1)
df = df.append(df_fires)
print('Total number of points sampled =', len(df))

df_train = df.sample(frac = 0.8)
df_valid = df.drop(df_train.index)

df_test = []
for year in range(2014, 2018):
    PATH = '/home/capstone22/WildFIrePrediction/isaac_data/processed_log_reg/'
    PATH += str(year) + '.csv'
    df = pd.read_csv(PATH)
    df_test.append(df)

print('Training Size:', len(df_train))
print('Validation Size:', len(df_valid))
print('Ratio:', len(df_train)/(len(df_train) + len(df_valid)))


Num no fires = 4488830
Num fires = 27077
Total number of points sampled = 475960
Training Size: 380768
Validation Size: 95192
Ratio: 0.8


In [5]:
'''
selected_features = ['Road_Dist',
                    'Elec_Dist',
                    'Cultivated_Prop',
                    'aet__maximum_Normal',
                    'aet__mean_Normal',
                    'aet__minimum_Normal',
                    'cwd__maximum_Normal',
                    'cwd__mean_Normal',
                    'cwd__minimum_Normal',
                    'pck__maximum_Normal',
                    'pck__mean_Normal',
                    'pck__minimum_Normal',
                    'aet__mean_ThreeYear_Dev',
                    'cwd__mean_ThreeYear_Dev',
                    'Mean_Housing_Dens_25km',
                    'FFWI',
                    'time_since_fire']
'''

selected_features = ['Elec_Dist',
                    'aet__mean_Normal',
                    'aet__mean_ThreeYear_Dev',
                    'cwd__mean_Normal',
                    'cwd__mean_ThreeYear_Dev',
                    'FFWI',
                    'time_since_fire']

In [6]:
class Data(Dataset):
    def __init__(self,dataframe, target,features):
        self.features = features
        self.y = torch.tensor(dataframe[target].values).float()
        self.x = torch.tensor(dataframe[features].values).float()
        self.df = dataframe

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, i):
        x = self.df.drop(columns=['value']).iloc[i][selected_features].values
        x = x.astype('float32')
        x = torch.from_numpy(x)

        y = self.df['value'].iloc[i]
        y = y.astype('long')
        y = torch.tensor(y)
        return x, y

In [7]:
train_data = Data(dataframe=df_train,target='value',features=selected_features)
valid_data = Data(dataframe=df_valid,target='value',features=selected_features)
test_data = []
for df in df_test:
    test_data.append(Data(dataframe=df,target='value',features=selected_features))

In [8]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 256, shuffle=True, num_workers=16)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size = 256, shuffle=False, num_workers=16)
test_loader = []

for data in test_data:
    test_loader = torch.utils.data.DataLoader(data, batch_size = 256, shuffle=False, num_workers=16)

In [13]:
class NN(nn.Module):
    def __init__(self,input_size):
        super().__init__()
        self.l1 = nn.Linear(input_size,64)
        self.l2 = nn.Linear(64,32)
        self.l3 = nn.Linear(32,8)
        self.l4 = nn.Linear(8, 2)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.log_softmax(self.l4(x), dim=1)
        return x

In [14]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(DEVICE)

model = NN(input_size=len(selected_features))
model = nn.DataParallel(model)
model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

cuda:0


In [15]:
summary(model)

Layer (type:depth-idx)                   Param #
├─NN: 1-1                                --
|    └─Linear: 2-1                       512
|    └─Linear: 2-2                       2,080
|    └─Linear: 2-3                       264
|    └─Linear: 2-4                       18
Total params: 2,874
Trainable params: 2,874
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─NN: 1-1                                --
|    └─Linear: 2-1                       512
|    └─Linear: 2-2                       2,080
|    └─Linear: 2-3                       264
|    └─Linear: 2-4                       18
Total params: 2,874
Trainable params: 2,874
Non-trainable params: 0

In [16]:
epochs = 10
for e in range(epochs):

    model.train()
    train_loss = 0
    for x, y in tqdm(train_loader, desc='Training'):
        #forward pass
        output = model(x.to(DEVICE))
        loss = criterion(output, y.to(DEVICE))
        #backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    #Validation
    model.eval()

    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for x, y in tqdm(valid_loader, desc='Validation'):
            output = model(x.to(DEVICE))
            loss = criterion(output, y.to(DEVICE))
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()

        train_loss /= len(train_data)
        val_loss /= len(val_data)
        val_acc = 100 * correct / total
        print(f"Epoch [{e}/{epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

Training:  77%|███████▋  | 1152/1488 [53:22<15:34,  2.78s/it] 


KeyboardInterrupt: 