In [1]:
%matplotlib notebook

In [2]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Import and read CSVs
centers_data = "Resources/centers_bystate.csv"
suicide_data = "Resources/suicide_states.csv"
suicide_df = pd.read_csv(suicide_data)
centers_df = pd.read_csv(centers_data, header=None, index_col=[0])

In [4]:
# Create headers and set index in "Centers" dataframe
centers_df.index.names = ['State']
centers_df = centers_df.rename(columns={1:"Centers"})
centers_df.head()

Unnamed: 0_level_0,Centers
State,Unnamed: 1_level_1
AK,80
AL,133
AR,204
AZ,305
CA,670


In [5]:
# Replace state name with state abbreviation in "Suicide" dataframe
suicide_df["State"] = suicide_df["State"].replace({
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
})
suicide_df.head()

Unnamed: 0,State,Year,Deaths,Population,Crude Rate
0,AL,2013,721,4827660,14.93
1,AL,2014,715,4840037,14.77
2,AL,2015,750,4850858,15.46
3,AL,2016,788,4860545,16.21
4,AL,2017,836,4874747,17.15


In [6]:
# Select only data from 2017
suicide_df_17 = suicide_df.loc[suicide_df["Year"]==2017]
suicide_df_17.head()

Unnamed: 0,State,Year,Deaths,Population,Crude Rate
4,AL,2017,836,4874747,17.15
9,AK,2017,200,739795,27.03
14,AZ,2017,1327,7016270,18.91
19,AR,2017,631,3004279,21.0
24,CA,2017,4312,39536653,10.91


In [7]:
# Remove Year and Deaths from the dataframe
suicide_data = suicide_df_17[["State", "Crude Rate", "Population"]]
suicide_data.head()

Unnamed: 0,State,Crude Rate,Population
4,AL,17.15,4874747
9,AK,27.03,739795
14,AZ,18.91,7016270
19,AR,21.0,3004279
24,CA,10.91,39536653


In [8]:
# Merge the dataframes on State
centers_suicide = pd.merge(suicide_data, centers_df, on="State")
centers_suicide.head(10)

Unnamed: 0,State,Crude Rate,Population,Centers
0,AL,17.15,4874747,133
1,AK,27.03,739795,80
2,AZ,18.91,7016270,305
3,AR,21.0,3004279,204
4,CA,10.91,39536653,670
5,CO,21.06,5607154,150
6,CT,11.29,3588184,205
7,DE,11.64,961939,23
8,DC,6.77,693972,36
9,FL,15.38,20984400,408


In [9]:
# Save CSV
centers_suicide.to_csv("Resources/centers_vs_suicide.csv", index=False, header=True)