In [1]:
import torch

import sys
from os import path
sys.path.append( "../website/apis/models/" )

from WildfireModel import WildfireModel

In [2]:
# define constants
IMG_SZ = 100
TRAIN_NEW_MODEL = False
MODEL_PATH = "../website/apis/models/wfm.pt"

In [3]:
# determine the linear size of the output layer of a convolutional layer
def conv2d_out_sz(in_size, kernel_size, pool_size, padding=0, stride=1):
    return ((in_size - kernel_size + 2*padding)/stride + 1)/pool_size

In [4]:
# use the function we just defined
# the value of fcin explicitly calculated here is used in the definition of the model in 

ks = 5 # kernel size
ps = 2 # pool size
out_chan = 64 # number of output channels 

os = conv2d_out_sz(IMG_SZ,ks,ps) # outputs to 2nd conv2d layer
os = conv2d_out_sz(os,ks,ps) # output size of 2nd conv2d layer
os = conv2d_out_sz(os,ks,ps) # output size 3

fcin = int((int(os)**2)*out_chan)
fcin

5184

In [5]:
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt

In [6]:
# must run the Preprocess.ipynb Jupyter Notebook to generate data before running this notebook
# load the balanced data array to train and test on
data = np.load("balanced_data.npy", allow_pickle=True)
extra_data = np.load("extra_data.npy", allow_pickle=True)

# format x the way torch wants to see it
x = torch.tensor([d[0] for d in data])
x = (x/255.0).view(-1,3,100,100)
# and the extra x
extra_x = torch.tensor([ed[0] for ed in extra_data])
extra_x = (extra_x/255.0).view(-1,3,100,100)

# format y the way torch wants to see it
y = torch.tensor([float(d[1]) for d in data])
# and the extra y
extra_y = torch.tensor([float(ed[1]) for ed in extra_data])

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.1)
(train_x.shape, train_y.shape), (test_x.shape, test_y.shape)

((torch.Size([3479, 3, 100, 100]), torch.Size([3479])),
 (torch.Size([387, 3, 100, 100]), torch.Size([387])))

In [7]:
def evaluate_accuracy(test_x, test_y, wfm):
    with torch.no_grad():
        y_pred = (np.round(wfm(test_x))).type(torch.FloatTensor)
        test_y = test_y.unsqueeze(1)
        correct = (y_pred == test_y).type(torch.FloatTensor)
        return correct.mean().item() # unpack the tensor

In [8]:
# bounded buffer class, keeps track of 'size' most recent insertions
class BoundedNumericList():
    
    def __init__(self, size):
        self.size = size
        self.nums = []
        self.next_insertion = 0
        
    def insert(self, item):
        if not isinstance(item, (int, float, complex)) or isinstance(item, bool):
            return False
        if len(self.nums) < self.size:
            self.nums += [item]
        else:
            self.nums[self.next_insertion % self.size] = item
        self.next_insertion += 1
        return True

    def average(self):
        if len(self.nums) == 0: return None
        return sum(self.nums) / len(self.nums)

In [9]:
if TRAIN_NEW_MODEL:

    # instantiate our model, initial optimizer, and loss function
    wfm = WildfireModel()
    optimizer = optim.AdamW(wfm.parameters(), lr=1e-4)
    loss_function = nn.BCELoss()

    # declare constants controlling the training process
    BATCH_SIZE = 100
    EPOCHS = 1000
    ROLLING_ACCURACY_SIZE = 4

    # instantiate our bounded buffer to keep track of the last ROLLING_ACCURACY_SIZE epochh accuracies
    past_epoch_accuracies = BoundedNumericList(ROLLING_ACCURACY_SIZE)
    highest_rolling_accuracy = -1

    for epoch in range(EPOCHS): # loop over all of our data EPOCH times
        for i in range(0, len(train_x), BATCH_SIZE): # iterate over our batches
            # grab the ith batch
            batch_x = train_x[i : i+BATCH_SIZE]
            batch_y = train_y[i : i+BATCH_SIZE]
            batch_y = torch.unsqueeze(batch_y, 1)

            # zero our gradient
            wfm.zero_grad()

            # pass the batch through the model
            outputs = wfm(batch_x)
            
            # compute the loss between the outputs and the expected
            loss = loss_function(outputs, batch_y)
            
            # update the model's weights
            loss.backward()
            optimizer.step()

        # calculate the most recent epoch's accuracy
        epoch_accuracy = evaluate_accuracy(test_x, test_y, wfm)
        
        # add it to the list of past accuracies
        past_epoch_accuracies.insert(epoch_accuracy)
        
        # calculate the rolling average
        rolling_accuracy = past_epoch_accuracies.average()

        print(f"Epoch: {epoch}. Loss: {loss}. Rolling Accuracy: {round(rolling_accuracy,3)}")

        # save the model if the most recent updates have been beneficial
        if rolling_accuracy > highest_rolling_accuracy:
            print(f"Saving model at epoch {epoch}.")
            torch.save(wfm, MODEL_PATH)
            highest_rolling_accuracy = rolling_accuracy

In [10]:
wfm = torch.load(MODEL_PATH)

acc = evaluate_accuracy(test_x, test_y, wfm)
print(f"Model accuracy on test data: {acc}")

extra_acc = evaluate_accuracy(extra_x, extra_y, wfm)
print(f"Model accuracy on extra data: {extra_acc}")



Model accuracy on test data: 0.9534883499145508
Model accuracy on extra data: 0.9596773982048035
