In [1]:
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch import nn
import math

In [2]:
# Load in data
all_data = pd.read_csv("dem_suic_data_wRisk.csv")
all_data.head()

Unnamed: 0,id,County_Names,num_households,percent_households_by_type_total_households,num_families,percent_households_by_type_total_households_family_households_(families),num_families_with_minors,percent_families_with_minors,num_families_with_married_couple,percent_families_with_married_couple,...,percent_percentage_of_families_and_people_whose_income_in_the_past_12_months_is_below_the_poverty_level_under_18_years_related_children_of_the_householder_under_18_years_related_children_of_the_householder_5_to_17_years,percent_percentage_of_families_and_people_whose_income_in_the_past_12_months_is_below_the_poverty_level_18_years_and_over,percent_percentage_of_families_and_people_whose_income_in_the_past_12_months_is_below_the_poverty_level_18_to_64_years,percent_percentage_of_families_and_people_whose_income_in_the_past_12_months_is_below_the_poverty_level_65_years_and_over,percent_percentage_of_families_and_people_whose_income_in_the_past_12_months_is_below_the_poverty_level_65_years_and_over_people_in_families,percent_percentage_of_families_and_people_whose_income_in_the_past_12_months_is_below_the_poverty_level_65_years_and_over_unrelated_individuals_15_years_and_over,Suicide_Counts,County_of_occurrence,Risk_Type,Suicide_Rate
0,0500000US06001,Alameda County,558907.0,558907.0,368905.0,66.0,174479.0,31.2,270195.0,48.3,...,14.7,11.8,12.2,9.2,9.1,25.9,138.0,1.0,0.0,8.891219
1,0500000US06005,Amador County,13925.0,13925.0,9402.0,67.5,2917.0,20.9,7704.0,55.3,...,20.7,10.8,13.3,5.0,8.8,29.5,7.0,5.0,1.0,21.307683
2,0500000US06007,Butte County,85318.0,85318.0,50963.0,59.7,20238.0,23.7,36475.0,42.8,...,22.8,21.3,24.7,8.3,14.9,40.5,39.0,7.0,1.0,17.960515
3,0500000US06009,Calaveras County,18060.0,18060.0,12144.0,67.2,3481.0,19.3,9906.0,54.9,...,12.4,12.5,14.8,6.9,8.6,30.0,20.0,9.0,1.0,45.199782
4,0500000US06011,Colusa County,6966.0,6966.0,5375.0,77.2,2563.0,36.8,4010.0,57.6,...,20.1,12.1,12.6,9.7,13.1,29.6,2.0,11.0,0.0,9.449563


In [3]:
# Splitting data into training and testing sets
# Using the 10 counties with the highest suicide rate and the 10 counties with the lowest suicide rate
# for the training set
all_data = all_data.sort_values(by = 'Suicide_Rate')
training_data = pd.concat([all_data[0:10], all_data[47:57]])
testing_data = all_data[10:47]

In [4]:
# Splitting data up
# 33 total features
x_train = training_data[["num_households", "num_families", "percent_families_with_minors", "percent_families_with_married_couple", "percent_families_with_minors_and_no_wife_present", "percent_families_with_minors_and_no_husband_present", "percent_nonfamily_households", "percent_single_person_households", "percent_single_person_households_65_over", "avg_household_size", "avg_family_size", "percent_of_married_males_married_but_separated","percent_of_married_females_married_but_separated", "percent_population_gradorprof_degree", "percent_population_hs_grad_or_higher", "percent_population_bachelors_degree_or_higher", "num_of_veterans", "percent_of_veterans", "percent_population_with_disability", "percent_of_minors_with_disability", "percent_native_born_in_US", "percent_foreign_born", "percent_population_where_household_lang_nonenglish", "percent_population_where_household_lang_spanish", "percent_population_in_labor_force", "percent_population_unemployed", "percent_employed_in_service_industry", "percent_population_income_less_than_10000", "median_household_income", "mean_household_income", "percent_population_on_food_stamps_in_past12mo", "percent_population_with_health_insurance", "percent_of_population_below_poverty_line"]]
y_train = training_data['Risk_Type']
x_test = testing_data[["num_households", "num_families", "percent_families_with_minors", "percent_families_with_married_couple", "percent_families_with_minors_and_no_wife_present", "percent_families_with_minors_and_no_husband_present", "percent_nonfamily_households", "percent_single_person_households", "percent_single_person_households_65_over", "avg_household_size", "avg_family_size", "percent_of_married_males_married_but_separated","percent_of_married_females_married_but_separated", "percent_population_gradorprof_degree", "percent_population_hs_grad_or_higher", "percent_population_bachelors_degree_or_higher", "num_of_veterans", "percent_of_veterans", "percent_population_with_disability", "percent_of_minors_with_disability", "percent_native_born_in_US", "percent_foreign_born", "percent_population_where_household_lang_nonenglish", "percent_population_where_household_lang_spanish", "percent_population_in_labor_force", "percent_population_unemployed", "percent_employed_in_service_industry", "percent_population_income_less_than_10000", "median_household_income", "mean_household_income", "percent_population_on_food_stamps_in_past12mo", "percent_population_with_health_insurance", "percent_of_population_below_poverty_line"]]
y_test = testing_data['Risk_Type']

In [5]:
# Creating tensors for NN
x, x_test = torch.from_numpy(pd.DataFrame.to_numpy(x_train)), torch.from_numpy(pd.DataFrame.to_numpy(x_test))
y, y_test = torch.from_numpy(pd.DataFrame.to_numpy(y_train)), torch.from_numpy(pd.DataFrame.to_numpy(y_test))

In [6]:
# Changing data dimensions
x = x.unsqueeze(0)
x_test = x_test.unsqueeze(0)

In [7]:
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.layer1 = nn.Conv1d(in_channels=1, out_channels=10, kernel_size = 33)
        self.layer2 = nn.Conv1d(in_channels=10, out_channels=1, kernel_size = 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

net = MyNetwork().double()

In [8]:
# Construct our loss function and an Optimizer
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(net.parameters(), lr=0.000000001)

In [9]:
for epoch in range(4):
    
    for i in range(20):
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = net(x[0,i].unsqueeze(0).unsqueeze(0))

        # Compute and print loss
        loss = nn.L1Loss()
        loss_val = loss(y_pred[0,0,0], y)
        if i%5==0:
            print(i, loss_val.item())

        # Perform a backward pass, and update the weights.
        loss_val.backward(retain_graph=True)
        optimizer.step()

print("Finished Training")

0 0.5
5 0.5
10 0.5
15 0.5
0 0.5
5 0.5
10 0.5
15 0.5
0 0.5
5 0.5
10 0.5
15 0.5
0 0.5
5 0.5
10 0.5
15 0.5
Finished Training


  return F.l1_loss(input, target, reduction=self.reduction)


In [10]:
# Save model
torch.save(net.state_dict(), 'CA_CNN')

In [12]:
# Load model for testing
net.load_state_dict(torch.load('CA_CNN'))

correct_count = 0

# Compute % accuracy
for i in range(37):
    # Putting test data through network
    output = net(x_test[0, i].unsqueeze(0).unsqueeze(0))
    
    # If model guesses correctly, correct_count increases by 1
    if round(output[0][0][0].item()) == round(y_test[i].item()):
        correct_count += 1

print("Score: " + str(round(correct_count / 37 * 100, 2)) + "%")

Score: 75.68%
