In [1]:
import pandas as pd
import numpy as np
import os
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
from torch.utils.data import Dataset
from torchsummary import summary

from time import time 

import tqdm

import tensorflow.keras.backend as K

import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve


  import cryptography.exceptions


In [2]:
class IsaacDS(Dataset):
    def __init__(self, split, features, data_dir):
        self.split = split
        self.features = features
        self.data_dir = data_dir
        self.df = self.getdf()
      

    def getdf(self):
        path = os.path.join(self.data_dir, f'isaac_{self.split}.csv')
        df = pd.read_csv(path)
        df = df[self.features]
        
        return df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        X = self.df.drop(columns=['value']).iloc[idx].values
        X = X.astype('float32')
        X = torch.from_numpy(X)
        
        y = self.df['value'].iloc[idx]
        y = y.astype('long')
        y = torch.tensor(y)

        return X, y

In [3]:
DATA_PATH = '/home/capstone22/WildFIrePrediction/isaac_data/processed_data_old'

FEATURES = [
 'Road_Dist',
 'Elec_Dist',
 'Cultivated_Prop',
 'aet__mean_Normal',
 'cwd__mean_Normal',
 'aet__mean_ThreeYear_Dev',
 'cwd__mean_ThreeYear_Dev',
 'Mean_Housing_Dens_25km',
 'FFWI',
 'value']

In [24]:
class DeepNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(len(FEATURES)-1, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 2)
        
    def forward (self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x

In [25]:
trainset = IsaacDS('train', FEATURES, DATA_PATH)
validset = IsaacDS('valid', FEATURES, DATA_PATH)
testset = IsaacDS('test', FEATURES, DATA_PATH)

In [26]:
trainset[0]

(tensor([1.1000e+04, 1.0000e+01, 0.0000e+00, 2.3528e+01, 5.2955e+01, 3.1711e+00,
         7.5091e+00, 2.6736e+01, 4.0219e+01]),
 tensor(0))

In [27]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 64, shuffle=True, num_workers=16)
validloader = torch.utils.data.DataLoader(validset, batch_size = 64, shuffle=False, num_workers=8)
testloader = torch.utils.data.DataLoader(testset, batch_size = 64, shuffle=False, num_workers=8)

In [28]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = DeepNN()
model = nn.DataParallel(model)
model.to(device)

DataParallel(
  (module): DeepNN(
    (fc1): Linear(in_features=9, out_features=64, bias=True)
    (fc2): Linear(in_features=64, out_features=128, bias=True)
    (fc3): Linear(in_features=128, out_features=32, bias=True)
    (fc4): Linear(in_features=32, out_features=2, bias=True)
  )
)

In [29]:
summary(model, (64, 16))

RuntimeError: Failed to run torchsummary. See above stack traces for more details. Executed layers up to: []

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [31]:
# defining custom loss function
def custom_loss(y_true,y_pred):
    # mean abs error implementation from keras
#     mae_tensor = K.mean(K.abs(y_pred - y_true), axis=-1)
    # +ve means real more than predicted, penalize
    # -ve means predicted more than real, normal loss
    diff_tensor = y_true - y_pred
    
    exponential_tensor = torch.exp(diff_tensor  * 100.0 / 30.0)
#     exponential_tensor = K.exp(diff_tensor * 100.0 / 20.0)
#     exponential_tensor = K.exp(diff_tensor * 100.0 / 11.0)
    exponential_tensor = torch.clip(exponential_tensor, min=1.0, max=10000.0)
    abs_tensor = torch.abs(diff_tensor)
    output_tensor = torch.mean(abs_tensor * exponential_tensor)
#     clipped_tensor = K.clip(exponential_tensor, 1.0, 10000.0)
    return output_tensor

In [32]:
"""
    Parameters: 
    y_true: true binary labels of fire/no fire
    y_pred: prediction values from model
"""

def plot_roc_curve(y_true, y_pred):
    try:
#         fpr, tpr, thresholds = roc_auc_score(y_true, y_pred)
#         plt.plot(fpr, tpr)
#         plt.xlabel('False Positive Rate')
#         plt.ylabel('True Positive Rate')

        print(f'AUC score: {roc_auc_score(y_true, y_pred)}')
    except ValueError:
#         print(f'only 1 class present in y_true')
        pass

In [33]:
epochs = 50

for e in range(epochs):
    start = time()
    running_loss = 0
    model.train()
    
    for X, y in trainloader:
        
        log_ps = model(X.to(device))
        
        prob = log_ps.clone()
        prob = np.exp(prob.cpu().detach().numpy())[:, 1]
        
#         loss = criterion(log_ps, y.to(device))
        loss = custom_loss(log_ps, y.to(device).view(-1,1))
    
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f'Epoch {e}: Training loss: {running_loss/len(trainloader)}', end=' ')
        
    model.eval()
    with torch.no_grad():
        total = 0
        correct = 0
        y_true, y_pred = [], []
        for X, y in validloader: 
            log_ps = model(X.to(device)).cpu()
            mx_index = torch.argmax(log_ps, dim=1)
#             plot_roc_curve(y, mx_index)
            total += y.numel()
            correct += sum(mx_index==y).item()
            
            y_true.extend(y)
            y_pred.extend(mx_index)
        
        accuracy = correct/total*100
        print(f'Validation test accuracy {accuracy:.2f}')
        plot_roc_curve(y_true, y_pred)
        
    print(f'Epoch time: {round(time()-start, 2)}')

Epoch 0: Training loss: 1.8837024127910782 Validation test accuracy 5.71
AUC score: 0.499568908928721
Epoch time: 32.24
Epoch 1: Training loss: 0.7745817047773228 Validation test accuracy 44.42
AUC score: 0.6125028929324496
Epoch time: 34.3
Epoch 2: Training loss: 0.7174764997060682 Validation test accuracy 4.40
AUC score: 0.5118989802433177
Epoch time: 35.01
Epoch 3: Training loss: 0.7071658887698417 Validation test accuracy 90.13
AUC score: 0.4763322014116547
Epoch time: 37.43
Epoch 4: Training loss: 0.7022776667765144 Validation test accuracy 52.67
AUC score: 0.4486855574219405
Epoch time: 39.01
Epoch 5: Training loss: 0.6986525838983723 Validation test accuracy 31.20
AUC score: 0.5679713136657796
Epoch time: 42.52
Epoch 6: Training loss: 0.7084146313993731 Validation test accuracy 80.40
AUC score: 0.4588924329874868
Epoch time: 44.37
Epoch 7: Training loss: 0.6982695470615907 Validation test accuracy 54.19
AUC score: 0.6459166238411017
Epoch time: 46.99
Epoch 8: Training loss: 0.69

In [None]:
for X, y in testloader:
    log_ps = model(X.to(device)).cpu()
    mx_index = torch.argmax(log_ps, dim=1)
    total += y.numel()
    correct += sum(mx_index==y).item()

    y_true.extend(y)
    y_pred.extend(mx_index)

accuracy = correct/total*100
print(f'Validation test accuracy {accuracy:.2f}')
plot_roc_curve(y_true, y_pred)