In [1]:
from pygam import LinearGAM, s, f

In [2]:
import dask.dataframe as ddf
import pandas as pd
import datetime
import functools
import seaborn as sns
import matplotlib.pyplot as plt

### Import suicide death data

In [3]:
years = [year for year in range(2000, 2020)]
years

[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019]

In [4]:
data = []
for year in years:
    suicide_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/Suicide_Death_NCHS/suicide_patient_level_1960_2020/overall/overall_suicide_patient_level_' + str(year) + '.csv', dtype = {'year': int, 'county_residence': str, 'county_death': str, 'state_residence': str, 'state_death': str})
    suicide_year = suicide_year.loc[:, ~suicide_year.columns.str.contains('^Unnamed')]
    data.append(suicide_year)

In [5]:
suicide = pd.concat(data)
suicide.head()

Unnamed: 0,year,county_residence,state_residence,state_death,county_death,month,sex,race,age,age_range,death_cause
0,2000,1115,1,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1101,1,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1001,1,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1003,1,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1015,1,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [6]:
suicide = suicide.drop(['county_residence', 'state_residence'], axis = 1)
suicide = suicide.rename(columns = {'state_death': 'statefips', 'county_death': 'fips'})
suicide.head()

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [7]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide
...,...,...,...,...,...,...,...,...,...
47669,2019,36,36081,Dec,Male,Other,5,age 25-64,nonfirearm_suicide
47670,2019,36,36081,Dec,Male,Black,5,age 25-64,nonfirearm_suicide
47671,2019,36,36005,Dec,Female,Black,4,age 0-24,nonfirearm_suicide
47672,2019,36,36005,Dec,Male,White,6,age 25-64,nonfirearm_suicide


### Load population data

In [8]:
data_pop = []
for year in years:
    pop_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/temp_bins_suicide/Population/population_monthly/population_monthly_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    pop_year = pop_year.loc[:, ~pop_year.columns.str.contains('^Unnamed')]
    data_pop.append(pop_year)

In [9]:
pop = pd.concat(data_pop)
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,Jan
1,2000,1001,39758,Feb
2,2000,1001,39870,Mar
3,2000,1001,39982,Apr
4,2000,1001,40094,May


In [10]:
month_replace = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
                'Nov': 11, 'Dec': 12}
pop['month'] = pop['month'].replace(month_replace)
pop['month'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [11]:
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,1
1,2000,1001,39758,2
2,2000,1001,39870,3
3,2000,1001,39982,4
4,2000,1001,40094,5


### Load elevation data

In [13]:
altitude = pd.read_csv('county_altitude.csv', dtype = {'FIPSCODE': str})
altitude = altitude.loc[:, ~altitude.columns.str.contains('^Unnamed')]
altitude = altitude.rename(columns = {'FIPSCODE': 'fips', 'elevation': 'altitude'})
altitude = altitude[['fips', 'altitude']]
altitude.head()

Unnamed: 0,fips,altitude
0,19107,207.03
1,19189,390.25
2,19029,385.95
3,19015,268.98
4,19059,425.89


In [14]:
def divideByAltitude(altitude):
    if altitude >= 500:
        return 'above'
    else:
        return 'below'

In [17]:
altitude['altitude_group'] = altitude['altitude'].apply(divideByAltitude)
altitude.head()

Unnamed: 0,fips,altitude,altitude_group
0,19107,207.03,below
1,19189,390.25,below
2,19029,385.95,below
3,19015,268.98,below
4,19059,425.89,below


### Merge dataframes together

In [18]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide
...,...,...,...,...,...,...,...,...,...
47669,2019,36,36081,Dec,Male,Other,5,age 25-64,nonfirearm_suicide
47670,2019,36,36081,Dec,Male,Black,5,age 25-64,nonfirearm_suicide
47671,2019,36,36005,Dec,Female,Black,4,age 0-24,nonfirearm_suicide
47672,2019,36,36005,Dec,Male,White,6,age 25-64,nonfirearm_suicide


In [19]:
suicide_death = suicide.drop(['statefips', 'sex', 'race', 'age', 'age_range'], axis = 1)
suicide_death = suicide_death.groupby(['year', 'fips', 'month']).count()
suicide_death = suicide_death.reset_index()
suicide_death.head()

Unnamed: 0,year,fips,month,death_cause
0,2000,1001,Apr,1
1,2000,1001,Aug,2
2,2000,1001,Feb,2
3,2000,1001,Jan,1
4,2000,1001,Jun,1


In [20]:
suicide_death['year'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])

In [21]:
suicide_death['month'] = suicide_death['month'].replace(month_replace)
suicide_death.head()

Unnamed: 0,year,fips,month,death_cause
0,2000,1001,4,1
1,2000,1001,8,2
2,2000,1001,2,2
3,2000,1001,1,1
4,2000,1001,6,1


In [43]:
suicide_pop = suicide_death.merge(pop, on = ['year', 'month', 'fips'], how = 'inner')
suicide_pop.head()

Unnamed: 0,year,fips,month,death_cause,pop
0,2000,1001,4,1,39982
1,2000,1001,8,2,40431
2,2000,1001,2,2,39758
3,2000,1001,1,1,39646
4,2000,1001,6,1,40207


In [44]:
suicide_pop = suicide_pop.rename(columns = {'death_cause': 'deaths'})
suicide_pop.head()

Unnamed: 0,year,fips,month,deaths,pop
0,2000,1001,4,1,39982
1,2000,1001,8,2,40431
2,2000,1001,2,2,39758
3,2000,1001,1,1,39646
4,2000,1001,6,1,40207


In [45]:
suicide_pop = suicide_pop.merge(altitude, on = ['fips'], how = 'inner')
suicide_pop.head()

Unnamed: 0,year,fips,month,deaths,pop,altitude,altitude_group
0,2000,1001,4,1,39982,128.66,below
1,2000,1001,8,2,40431,128.66,below
2,2000,1001,2,2,39758,128.66,below
3,2000,1001,1,1,39646,128.66,below
4,2000,1001,6,1,40207,128.66,below


### Load temperature and precipitation data from PRISM

In [24]:
data_temp = []
for year in years:
    temp_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/PRISM_Data/PRISM_daily_county_level/prism_daily_county_level_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    temp_year = temp_year.loc[:, ~temp_year.columns.str.contains('^Unnamed')]
    data_temp.append(temp_year)

In [25]:
climate = pd.concat(data_temp)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year
0,1001,2000-01-01,16.262459,10.08938,22.435537,0.560423,2000
1,1001,2000-01-02,17.412651,11.95422,22.871081,0.390251,2000
2,1001,2000-01-03,18.705264,14.001039,23.40949,0.21617,2000
3,1001,2000-01-04,16.203456,9.861904,22.545009,16.488676,2000
4,1001,2000-01-05,6.494604,-1.153744,14.142953,0.216055,2000


In [26]:
def celsius_to_fahrenheit(celsius):
    return (celsius * 9/5) + 32

In [27]:
climate['tMean'] = climate['tMean'].apply(celsius_to_fahrenheit)
climate['tMin'] = climate['tMin'].apply(celsius_to_fahrenheit)
climate['tMax'] = climate['tMax'].apply(celsius_to_fahrenheit)

In [28]:
import datetime

In [29]:
def convertTime(time):
    return datetime.strptime(time, '%Y-%m-%d')

In [30]:
def getMonth(date):
    return(date.strftime('%b'))

In [31]:
climate['date'] = pd.to_datetime(climate['date'])
climate['month'] = climate['date'].apply(getMonth)

In [32]:
climate['month'] = climate['month'].replace(month_replace)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year,month
0,1001,2000-01-01,61.272425,50.160884,72.383967,0.560423,2000,1
1,1001,2000-01-02,63.342771,53.517596,73.167946,0.390251,2000,1
2,1001,2000-01-03,65.669476,57.20187,74.137082,0.21617,2000,1
3,1001,2000-01-04,61.166221,49.751427,72.581016,16.488676,2000,1
4,1001,2000-01-05,43.690288,29.923261,57.457315,0.216055,2000,1


In [33]:
climate = climate.drop(['date'], axis = 1)
climate = climate.groupby(['year', 'month', 'fips']).mean()
climate = climate.reset_index()
climate.head()

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec
0,2000,1,1001,48.16656,37.347152,58.985968,4.190979
1,2000,1,1003,52.781099,41.5319,64.030298,2.639092
2,2000,1,1005,48.316217,36.439576,60.192859,3.507654
3,2000,1,1007,45.746169,34.33067,57.161669,4.497632
4,2000,1,1009,42.205221,32.07842,52.332022,4.782151


In [34]:
climate

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec
0,2000,1,01001,48.166560,37.347152,58.985968,4.190979
1,2000,1,01003,52.781099,41.531900,64.030298,2.639092
2,2000,1,01005,48.316217,36.439576,60.192859,3.507654
3,2000,1,01007,45.746169,34.330670,57.161669,4.497632
4,2000,1,01009,42.205221,32.078420,52.332022,4.782151
...,...,...,...,...,...,...,...
745195,2019,12,56037,17.121235,7.089279,27.153191,0.632654
745196,2019,12,56039,17.573244,9.812942,25.333545,2.434054
745197,2019,12,56041,18.615721,7.988787,29.242656,0.900065
745198,2019,12,56043,23.729262,12.816991,34.641533,0.354680


In [38]:
climate = climate.merge(altitude, on = ['fips'], how = 'inner')
climate.head()

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec,altitude,altitude_group
0,2000,1,1001,48.16656,37.347152,58.985968,4.190979,128.66,below
1,2000,2,1001,53.17536,39.102574,67.248145,1.57738,128.66,below
2,2000,3,1001,59.891998,46.67461,73.109385,3.348399,128.66,below
3,2000,4,1001,61.008822,47.667463,74.350182,2.889071,128.66,below
4,2000,5,1001,75.073873,62.554663,87.593083,1.175421,128.66,below


In [39]:
climate = climate.drop(['fips', 'altitude'], axis =1)
climate = climate.groupby(['year', 'month', 'altitude_group']).mean()
climate.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tMean,tMin,tMax,prec
year,month,altitude_group,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000,1,above,30.75273,18.970697,42.534762,1.660137
2000,1,below,34.542387,24.166347,44.918427,2.385929
2000,2,above,36.651611,24.126621,49.176602,1.846535
2000,2,below,41.911309,30.390252,53.432367,2.269223
2000,3,above,42.102596,29.186079,55.019113,2.006188


In [40]:
climate = climate.reset_index()
climate.head()

Unnamed: 0,year,month,altitude_group,tMean,tMin,tMax,prec
0,2000,1,above,30.75273,18.970697,42.534762,1.660137
1,2000,1,below,34.542387,24.166347,44.918427,2.385929
2,2000,2,above,36.651611,24.126621,49.176602,1.846535
3,2000,2,below,41.911309,30.390252,53.432367,2.269223
4,2000,3,above,42.102596,29.186079,55.019113,2.006188


In [46]:
suicide_pop.head()

Unnamed: 0,year,fips,month,deaths,pop,altitude,altitude_group
0,2000,1001,4,1,39982,128.66,below
1,2000,1001,8,2,40431,128.66,below
2,2000,1001,2,2,39758,128.66,below
3,2000,1001,1,1,39646,128.66,below
4,2000,1001,6,1,40207,128.66,below


In [47]:
suicide_pop = suicide_pop.drop(['fips', 'altitude'], axis =1)
suicide_pop = suicide_pop.groupby(['year', 'month', 'altitude_group']).sum()
suicide_pop = suicide_pop.reset_index()
suicide_pop.head()

Unnamed: 0,year,month,altitude_group,deaths,pop
0,2000,1,above,491,30796126
1,2000,1,below,1970,156375770
2,2000,2,above,409,30545281
3,2000,2,below,1832,152847092
4,2000,3,above,489,30961931


In [48]:
suicide_climate = suicide_pop.merge(climate, on = ['year', 'month', 'altitude_group'], how = 'inner')
suicide_climate.head()

Unnamed: 0,year,month,altitude_group,deaths,pop,tMean,tMin,tMax,prec
0,2000,1,above,491,30796126,30.75273,18.970697,42.534762,1.660137
1,2000,1,below,1970,156375770,34.542387,24.166347,44.918427,2.385929
2,2000,2,above,409,30545281,36.651611,24.126621,49.176602,1.846535
3,2000,2,below,1832,152847092,41.911309,30.390252,53.432367,2.269223
4,2000,3,above,489,30961931,42.102596,29.186079,55.019113,2.006188


In [49]:
suicide_climate

Unnamed: 0,year,month,altitude_group,deaths,pop,tMean,tMin,tMax,prec
0,2000,1,above,491,30796126,30.752730,18.970697,42.534762,1.660137
1,2000,1,below,1970,156375770,34.542387,24.166347,44.918427,2.385929
2,2000,2,above,409,30545281,36.651611,24.126621,49.176602,1.846535
3,2000,2,below,1832,152847092,41.911309,30.390252,53.432367,2.269223
4,2000,3,above,489,30961931,42.102596,29.186079,55.019113,2.006188
...,...,...,...,...,...,...,...,...,...
475,2019,10,below,3292,212280151,58.557228,47.512646,69.601811,3.871787
476,2019,11,above,715,42629974,37.807169,25.390543,50.223795,1.209843
477,2019,11,below,2867,208629097,42.801831,32.141708,53.461955,2.011083
478,2019,12,above,693,42530686,32.649707,22.214124,43.085290,1.572717


In [50]:
suicide_climate['suicide_rate'] = (suicide_climate['deaths']/suicide_climate['pop'])*100000
suicide_climate.head()

Unnamed: 0,year,month,altitude_group,deaths,pop,tMean,tMin,tMax,prec,suicide_rate
0,2000,1,above,491,30796126,30.75273,18.970697,42.534762,1.660137,1.594356
1,2000,1,below,1970,156375770,34.542387,24.166347,44.918427,2.385929,1.259786
2,2000,2,above,409,30545281,36.651611,24.126621,49.176602,1.846535,1.338996
3,2000,2,below,1832,152847092,41.911309,30.390252,53.432367,2.269223,1.198583
4,2000,3,above,489,30961931,42.102596,29.186079,55.019113,2.006188,1.579359


In [51]:
summer_winter = [6, 7, 8, 12, 1, 2]
def is_summer_winter(month):
    if month in summer_winter :
        return 1
    else:
        return 0

In [52]:
suicide_climate['summer_winter'] = suicide_climate['month'].apply(is_summer_winter)
suicide_climate.head()

Unnamed: 0,year,month,altitude_group,deaths,pop,tMean,tMin,tMax,prec,suicide_rate,summer_winter
0,2000,1,above,491,30796126,30.75273,18.970697,42.534762,1.660137,1.594356,1
1,2000,1,below,1970,156375770,34.542387,24.166347,44.918427,2.385929,1.259786,1
2,2000,2,above,409,30545281,36.651611,24.126621,49.176602,1.846535,1.338996,1
3,2000,2,below,1832,152847092,41.911309,30.390252,53.432367,2.269223,1.198583,1
4,2000,3,above,489,30961931,42.102596,29.186079,55.019113,2.006188,1.579359,0


In [53]:
suicide_climate.to_csv('monthly_GAM_altitude.csv')