In [100]:
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch import nn
import math

In [101]:
# Load in data
all_data = pd.read_csv("dem_suic_data_wRisk.csv")
all_data.head()

Unnamed: 0,County_Names,"Population estimates, July 1, 2019, (V2019)","Population estimates base, April 1, 2010, (V2019)","Population, percent change - April 1, 2010 (estimates base) to July 1, 2019, (V2019)",Population_2010,"Persons under 5 years, percent","Persons under 18 years, percent","Persons 65 years and over, percent","Female persons, percent","White alone, percent",...,"Nonminority-owned firms, 2012","Veteran-owned firms, 2012","Nonveteran-owned firms, 2012",Population_Density,"Land area in square miles, 2010",FIPS_County_Code,Suicide_Counts,County_of_occurrence,Suicide_Rate,Risk_Type
0,San Mateo County,766573.0,718517.0,6.7,718451.0,5.5,20.2,16.6,50.5,59.5,...,40894.0,5141.0,66645.0,1602.2,448.41,81,58.0,81.0,8.072924,0.0
1,Santa Clara County,1927852.0,1781686.0,8.2,1781642.0,5.8,21.6,13.9,49.3,52.4,...,72818.0,9400.0,146384.0,1381.0,1290.1,85,147.0,85.0,8.250816,0.0
2,Los Angeles County,10039107.0,9819968.0,2.2,9818605.0,5.8,21.4,14.1,50.7,70.7,...,481643.0,69608.0,1044750.0,2419.6,4057.88,37,824.0,37.0,8.392231,0.0
3,Alameda County,1671329.0,1510258.0,10.7,1510271.0,5.7,20.3,14.3,50.7,49.3,...,69300.0,10213.0,134032.0,2043.6,739.02,1,138.0,1.0,9.137433,0.0
4,San Joaquin County,762148.0,685306.0,11.2,685306.0,6.9,26.8,13.1,50.1,66.1,...,19915.0,3449.0,36663.0,492.6,1391.32,77,63.0,77.0,9.192974,0.0


In [102]:
# Splitting data into training and testing sets
# Using the 15 counties with the highest suicide rate and the 15 counties with the lowest suicide rate
# for the training set
all_data = all_data.sort_values(by = 'Suicide_Rate')
training_data = pd.concat([all_data[0:20], all_data[37:57]])
testing_data = all_data[20:37]

In [103]:
# Splitting data up
x_train = training_data[['Avg_Household_Size', 'HS_Grad_Percent', 'College_Grad_Percent', 'Disability_percent', 'Vet_Population', 'Employed_16+_Percent', 'Lacking_Health_Insurance_Percent', 'Household_Income', 'Poverty_Percentage', 'Population_Density']]
y_train = training_data['Risk_Type']
x_test = testing_data[['Avg_Household_Size', 'HS_Grad_Percent', 'College_Grad_Percent', 'Disability_percent', 'Vet_Population', 'Employed_16+_Percent', 'Lacking_Health_Insurance_Percent', 'Household_Income', 'Poverty_Percentage', 'Population_Density']]
y_test = testing_data['Risk_Type']

In [104]:
# Creating tensors for NN
x, x_test = torch.from_numpy(pd.DataFrame.to_numpy(x_train)), torch.from_numpy(pd.DataFrame.to_numpy(x_test))
y, y_test = torch.from_numpy(pd.DataFrame.to_numpy(y_train)), torch.from_numpy(pd.DataFrame.to_numpy(y_test))

In [105]:
# Changing data dimensions
x = x.unsqueeze(0)
x_test = x_test.unsqueeze(0)

In [106]:
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.layer1 = nn.Conv1d(in_channels=1, out_channels=10, kernel_size = 3)
        self.layer2 = nn.Conv1d(in_channels=10, out_channels=1, kernel_size = 8)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

net = MyNetwork().double()

In [107]:
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(net.parameters(), lr=0.0000001)

In [108]:
for epoch in range(2):
    
    for i in range(40):
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = net(x[0,i].unsqueeze(0).unsqueeze(0))

        # Compute and print loss
        loss = nn.L1Loss()
        loss_val = loss(y_pred[0,0,0], y)
        if i%5==0:
            print(i, loss_val.item())

        # Perform a backward pass, and update the weights.
        loss_val.backward(retain_graph=True)
        optimizer.step()

print("Finished Training")

0 0.5
5 0.5
10 0.5
15 0.5
20 0.5
25 0.5
30 0.5
35 0.5
0 0.5
5 0.5
10 0.5
15 0.5
20 0.5
25 0.5
30 0.5
35 0.5
Finished Training


  return F.l1_loss(input, target, reduction=self.reduction)


In [109]:
# Save model
torch.save(net.state_dict(), 'CA_CNN')

In [110]:
# Load model for testing
net.load_state_dict(torch.load('CA_CNN'))

correct_count = 0

# Compute % accuracy
for i in range(17):
    
    output = net(x_test[0, i].unsqueeze(0).unsqueeze(0))
    
    # Printing out model's guesses and comparing them to actual answer
    print("Guess: " + str(int(output.item())) + "\tActual: " + str(int(y_test[i].item())))
    
    if int(output.item()) == int(y_test[i].item()):
        correct_count += 1

print("Score: " + str(round(correct_count / 17 * 100, 2)) + "%")

Guess: 1	Actual: 0
Guess: 1	Actual: 0
Guess: 1	Actual: 0
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 0	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Guess: 1	Actual: 1
Score: 76.47%
