In [None]:
from google.colab import drive
import os
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import json
import math

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from tqdm.notebook import tqdm, trange

In [None]:
csv_path = os.path.join(os.getcwd(), "drive", "My Drive", "EMS_2019_subsampled_densitydemo.csv")

df=pd.read_csv(csv_path, sep=',')

Preprocess data

In [None]:
def preprocess_target_column(df, col_name):
  if (df[col_name].dtype == 'object'):
    df[col_name].str.replace(',', '')
  df[col_name] = pd.to_numeric(df[col_name], errors='coerce')

In [None]:
preprocess_target_column(df, "INCIDENT_RESPONSE_SECONDS_QY")
preprocess_target_column(df, "FINAL_SEVERITY_LEVEL_CODE")

df = df.dropna(subset=["INCIDENT_RESPONSE_SECONDS_QY", "FINAL_SEVERITY_LEVEL_CODE"])

#these are arrays of ints 
target1 = df["INCIDENT_RESPONSE_SECONDS_QY"].values.astype(int)
target2 = df["FINAL_SEVERITY_LEVEL_CODE"].values.astype(int)

Test/Train Split

In [None]:
train_proportion = 0.8
n = df.shape[0]

ntrain = (int) (train_proportion*n)

In [None]:
train_x = df[:ntrain]
test_x = df[ntrain:]

train_y1 = target1[:ntrain]
train_y2 = target2[:ntrain]

test_y1 = target1[ntrain:]
test_y2 = target2[ntrain:]

Inputs

In [None]:
import datetime

In [None]:
def process_dates(dates):
  res = []

  for date in dates:
    dateobj = datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p')

    year = dateobj.year
    month = dateobj.month
    week = (int) (dateobj.strftime("%V"))
    day = dateobj.day
    hour = dateobj.hour

    res.append([year, month, week, day, hour])

  return torch.tensor(res)

In [None]:
train_dates = process_dates(train_x["INCIDENT_DATETIME"])
test_dates = process_dates(test_x["INCIDENT_DATETIME"])

print(train_dates.shape)
print(test_dates.shape)

torch.Size([301843, 5])
torch.Size([75461, 5])


In [None]:
def one_hot(array, unique, inverse=[]):
  if (inverse == []):
    array = array.values
    onehot = np.zeros((len(array),len(unique)))
    for i in range(len(array)):
      for j in range(len(unique)):
        if array[i] == unique[j]:
          onehot[i, j] = 1
  else:
    onehot = np.eye(unique.shape[0])[inverse]
  
  return torch.from_numpy(onehot)

In [None]:
unique, inverse = np.unique(train_x["INITIAL_CALL_TYPE"], return_inverse=True)

train_calltype = one_hot(train_x["INITIAL_CALL_TYPE"], unique, inverse)
test_calltype = one_hot(test_x["INITIAL_CALL_TYPE"], unique)

print(train_calltype.shape)
print(test_calltype.shape)

  


torch.Size([301843, 72])
torch.Size([75461, 72])


In [None]:
train_initsev = torch.tensor(train_x["INITIAL_SEVERITY_LEVEL_CODE"].values).unsqueeze(1)
test_initsev = torch.tensor(test_x["INITIAL_SEVERITY_LEVEL_CODE"].values).unsqueeze(1)

print(train_initsev.shape)
print(test_initsev.shape)

torch.Size([301843, 1])
torch.Size([75461, 1])


In [154]:
def replace_nans_with_avg(array):
  average = np.mean(array[~np.isnan(array)])
  return torch.tensor([value if not math.isnan(value) else average for value in array])

In [157]:
train_pop_dens = replace_nans_with_avg(train_x["Population Density / sq mi"].values).unsqueeze(1)
test_pop_dens = replace_nans_with_avg(test_x["Population Density / sq mi"].values).unsqueeze(1)

train_pop = replace_nans_with_avg(train_x["Population"].values).unsqueeze(1)
test_pop = replace_nans_with_avg(test_x["Population"].values).unsqueeze(1)

train_race = replace_nans_with_avg(train_x["PERCENT WHITE NON HISPANIC"].values).unsqueeze(1)
test_race = replace_nans_with_avg(test_x["PERCENT WHITE NON HISPANIC"].values).unsqueeze(1)

train_extras = torch.cat((train_pop_dens, train_pop, train_race), 1)
test_extras = torch.cat((test_pop_dens, test_pop, test_race), 1)

print(train_extras.shape)
print(train_extras.shape)

torch.Size([301843, 3])
torch.Size([301843, 3])


In [None]:
unique, inverse = np.unique(train_x["ZIPCODE"], return_inverse=True)

train_zip = one_hot(train_x["ZIPCODE"], unique, inverse)
test_zip = one_hot(test_x["ZIPCODE"], unique)

print(train_zip.shape)
print(test_zip.shape)

  


torch.Size([301843, 230])
torch.Size([75461, 230])


Combine to get training set

In [159]:
train_vectorized = torch.cat((train_dates, train_calltype, train_extras, train_initsev),1)
test_vectorized = torch.cat((test_dates, test_calltype, test_extras, test_initsev),1)

print(train_vectorized.shape)
print(test_vectorized.shape)

torch.Size([301843, 81])
torch.Size([75461, 81])


FFNN Code

In [161]:
class FFNNDataset(Dataset):
    def __init__(self, data, target):
        self.X = torch.cat([X.unsqueeze(0) for X in data])
        self.y = torch.LongTensor(target)
        self.len = len(data)
    
    def __len__(self):
        return self.len
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [162]:
def get_data_loaders(train, test, target, batch_size=16):
    """
    """
    # First we create the dataset given our train and test lists
    dataset = FFNNDataset(torch.cat((train, test)), target)

    # Then, we create a list of indices for all samples in the dataset
    train_indices = [i for i in range(len(train))]
    test_indices = [i for i in range(len(train), len(train) + len(test))]

    # Now we define samplers and loaders for train and test
    train_sampler = SubsetRandomSampler(train_indices)
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    
    test_sampler = SubsetRandomSampler(test_indices)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

    return train_loader, test_loader

In [163]:
#train_loader1, test_loader1 = get_data_loaders(train_vectorized, test_vectorized, batch_size=16)
train_loader2, test_loader2 = get_data_loaders(train_vectorized, test_vectorized, [y-1 for y in target2], batch_size=16)

In [164]:
# Lambda to switch to GPU if available
get_device = lambda : "cuda:0" if torch.cuda.is_available() else "cpu"

In [165]:
# Consult the PyTorch documentation for information on the functions used below:
# https://pytorch.org/docs/stable/torch.html

class FFNN(nn.Module):
	def __init__(self, input_dim, h, output_dim, activationF = nn.ReLU):
		super(FFNN, self).__init__()
		self.h = h
		self.W1 = nn.Linear(input_dim, h)
		torch.nn.init.kaiming_uniform_(self.W1.weight)
  
		self.activation = activationF()
		self.W2 = nn.Linear(h, output_dim)
		torch.nn.init.kaiming_uniform_(self.W2.weight)
  
		self.softmax = nn.LogSoftmax(dim=1)
		self.loss = nn.NLLLoss() 

	def compute_Loss(self, predicted_vector, gold_label):
		return self.loss(predicted_vector, gold_label)

	def forward(self, input_vector):
		z1 = self.W1(input_vector)
		hidden = self.activation(z1)
		z2 = self.W2(hidden)

		predicted_vector = self.softmax(z2)
		return predicted_vector
	
	def load_model(self, save_path):
		self.load_state_dict(torch.load(save_path))
	
	def save_model(self, save_path):
		torch.save(self.state_dict(), save_path)


def train_epoch(model, train_loader, optimizer):
	model.train()
	total = 0
	loss = 0
	correct = 0
	val_loss = 0.0
	val_steps = 0
	for (input_batch, expected_out) in tqdm(train_loader, leave=False, desc="Training Batches"):
		output = model(input_batch.to(get_device()).float())
		
		total += output.size()[0]
		_, predicted = torch.max(output, 1)

		#print(output)
		#print(expected_out.to(get_device()))
	
		correct += (expected_out == predicted.to("cpu")).cpu().numpy().sum()


		loss = model.compute_Loss(output, expected_out.to(get_device()))
		val_loss += loss.cpu().detach().numpy()
		val_steps += 1

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
	# Print accuracy
	print("loss: " + str(val_loss / val_steps))
	print("accuracy: " + str(correct/total))

	return val_loss / val_steps, correct/total



def evaluation(model, val_loader, optimizer):
	model.eval()
	loss = 0
	correct = 0
	total = 0
	for (input_batch, expected_out) in tqdm(val_loader, leave=False, desc="Validation Batches"):
		output = model(input_batch.to(get_device()).float())
		total += output.size()[0]
		_, predicted = torch.max(output, 1)
		correct += (expected_out.to("cpu") == predicted.to("cpu")).cpu().numpy().sum()

		loss += model.compute_Loss(output, expected_out.to(get_device()))
	loss /= len(val_loader)
	# Print validation metrics
	print("validation accuracy: " + str(correct/total))
	pass

def train_and_evaluate(number_of_epochs, model, train_loader, val_loader):
	optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
	for epoch in trange(number_of_epochs, desc="Epochs"):
		loss, accuracy = train_epoch(model, train_loader, optimizer) 
		evaluation(model, val_loader, optimizer)
	return loss, accuracy

In [166]:
#severity level

num_epochs = 10
in_dimens = train_vectorized.shape[1]
h = 50
num_severity_levels = 8

model = FFNN(in_dimens, h, num_severity_levels)
train_and_evaluate(num_epochs, model, train_loader2, test_loader2)

HBox(children=(FloatProgress(value=0.0, description='Epochs', max=10.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 6540.555095162572
accuracy: 0.2367919746358206


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.8202667696900854
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.8197939421369052
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.819666909284358
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.8196225688753964
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.8195946695796255
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.819585894288484
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.819569463891871
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.8195752928571156
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977


HBox(children=(FloatProgress(value=0.0, description='Training Batches', max=18866.0, style=ProgressStyle(descr…

loss: 1.819600054413494
accuracy: 0.2370238832770679


HBox(children=(FloatProgress(value=0.0, description='Validation Batches', max=4717.0, style=ProgressStyle(desc…

validation accuracy: 0.2372086243224977



(1.819600054413494, 0.2370238832770679)