In [1]:
from pygam import LinearGAM, s, f

In [2]:
import dask.dataframe as ddf
import pandas as pd
import datetime
import functools
import seaborn as sns
import matplotlib.pyplot as plt

### Import suicide death data

In [3]:
years = [year for year in range(2000, 2020)]
years

[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019]

In [4]:
data = []
for year in years:
    suicide_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/Suicide_Death_NCHS/suicide_patient_level_1960_2020/overall/overall_suicide_patient_level_' + str(year) + '.csv', dtype = {'year': int, 'county_residence': str, 'county_death': str, 'state_residence': str, 'state_death': str})
    suicide_year = suicide_year.loc[:, ~suicide_year.columns.str.contains('^Unnamed')]
    data.append(suicide_year)

In [5]:
suicide = pd.concat(data)
suicide.head()

Unnamed: 0,year,county_residence,state_residence,state_death,county_death,month,sex,race,age,age_range,death_cause
0,2000,1115,1,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1101,1,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1001,1,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1003,1,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1015,1,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [6]:
suicide = suicide.drop(['county_residence', 'state_residence'], axis = 1)
suicide = suicide.rename(columns = {'state_death': 'statefips', 'county_death': 'fips'})
suicide.head()

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [7]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide
...,...,...,...,...,...,...,...,...,...
47669,2019,36,36081,Dec,Male,Other,5,age 25-64,nonfirearm_suicide
47670,2019,36,36081,Dec,Male,Black,5,age 25-64,nonfirearm_suicide
47671,2019,36,36005,Dec,Female,Black,4,age 0-24,nonfirearm_suicide
47672,2019,36,36005,Dec,Male,White,6,age 25-64,nonfirearm_suicide


In [9]:
suicide['suicide_means'] = suicide['death_cause']
suicide.head()

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause,suicide_means
0,2000,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide,firearm_suicide
1,2000,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide,firearm_suicide
2,2000,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide,firearm_suicide
3,2000,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide,nonfirearm_suicide
4,2000,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide,firearm_suicide


In [11]:
suicide_death = suicide.drop(['statefips', 'sex', 'race', 'age', 'age_range', 'fips'], axis = 1)
suicide_death = suicide_death.groupby(['year', 'month', 'suicide_means']).count()
suicide_death = suicide_death.reset_index()
suicide_death.head()

Unnamed: 0,year,month,suicide_means,death_cause
0,2000,Apr,firearm_suicide,1373
1,2000,Apr,nonfirearm_suicide,1112
2,2000,Aug,firearm_suicide,1411
3,2000,Aug,nonfirearm_suicide,1151
4,2000,Dec,firearm_suicide,1280


In [12]:
month_replace = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
                'Nov': 11, 'Dec': 12}

In [13]:
suicide_death['month'] = suicide_death['month'].replace(month_replace)
suicide_death.head()

Unnamed: 0,year,month,suicide_means,death_cause
0,2000,4,firearm_suicide,1373
1,2000,4,nonfirearm_suicide,1112
2,2000,8,firearm_suicide,1411
3,2000,8,nonfirearm_suicide,1151
4,2000,12,firearm_suicide,1280


### Load population data

In [14]:
data_pop = []
for year in years:
    pop_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/temp_bins_suicide/Population/population_monthly/population_monthly_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    pop_year = pop_year.loc[:, ~pop_year.columns.str.contains('^Unnamed')]
    data_pop.append(pop_year)

In [15]:
pop = pd.concat(data_pop)
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,Jan
1,2000,1001,39758,Feb
2,2000,1001,39870,Mar
3,2000,1001,39982,Apr
4,2000,1001,40094,May


In [16]:
pop['month'] = pop['month'].replace(month_replace)
pop['month'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [17]:
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,1
1,2000,1001,39758,2
2,2000,1001,39870,3
3,2000,1001,39982,4
4,2000,1001,40094,5


In [19]:
pop_month = pop.drop(['fips'], axis = 1)
pop_month = pop_month.groupby(['year', 'month']).sum()
pop_month = pop_month.reset_index()
pop_month

Unnamed: 0,year,month,pop
0,2000,1,252927063
1,2000,2,253131713
2,2000,3,253336339
3,2000,4,253540949
4,2000,5,253745033
...,...,...,...
235,2019,8,297898901
236,2019,9,297901159
237,2019,10,297903483
238,2019,11,297905757


### Merge dataframes together

In [20]:
suicide_death

Unnamed: 0,year,month,suicide_means,death_cause
0,2000,4,firearm_suicide,1373
1,2000,4,nonfirearm_suicide,1112
2,2000,8,firearm_suicide,1411
3,2000,8,nonfirearm_suicide,1151
4,2000,12,firearm_suicide,1280
...,...,...,...,...
475,2019,11,nonfirearm_suicide,1738
476,2019,10,firearm_suicide,1981
477,2019,10,nonfirearm_suicide,2074
478,2019,9,firearm_suicide,2018


In [21]:
suicide_death['year'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])

In [22]:
suicide_pop = suicide_death.merge(pop_month, on = ['year', 'month'], how = 'inner')
suicide_pop.head()

Unnamed: 0,year,month,suicide_means,death_cause,pop
0,2000,4,firearm_suicide,1373,253540949
1,2000,4,nonfirearm_suicide,1112,253540949
2,2000,8,firearm_suicide,1411,254358910
3,2000,8,nonfirearm_suicide,1151,254358910
4,2000,12,firearm_suicide,1280,255177896


In [23]:
suicide_pop = suicide_pop.rename(columns = {'death_cause': 'deaths'})
suicide_pop.head()

Unnamed: 0,year,month,suicide_means,deaths,pop
0,2000,4,firearm_suicide,1373,253540949
1,2000,4,nonfirearm_suicide,1112,253540949
2,2000,8,firearm_suicide,1411,254358910
3,2000,8,nonfirearm_suicide,1151,254358910
4,2000,12,firearm_suicide,1280,255177896


### Load temperature and precipitation data from PRISM

In [37]:
data_temp = []
for year in years:
    temp_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/PRISM_Data/PRISM_daily_county_level/prism_daily_county_level_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    temp_year = temp_year.loc[:, ~temp_year.columns.str.contains('^Unnamed')]
    data_temp.append(temp_year)

In [38]:
climate = pd.concat(data_temp)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year
0,1001,2000-01-01,16.262459,10.08938,22.435537,0.560423,2000
1,1001,2000-01-02,17.412651,11.95422,22.871081,0.390251,2000
2,1001,2000-01-03,18.705264,14.001039,23.40949,0.21617,2000
3,1001,2000-01-04,16.203456,9.861904,22.545009,16.488676,2000
4,1001,2000-01-05,6.494604,-1.153744,14.142953,0.216055,2000


In [39]:
def celsius_to_fahrenheit(celsius):
    return (celsius * 9/5) + 32

In [40]:
climate['tMean'] = climate['tMean'].apply(celsius_to_fahrenheit)
climate['tMin'] = climate['tMin'].apply(celsius_to_fahrenheit)
climate['tMax'] = climate['tMax'].apply(celsius_to_fahrenheit)

In [41]:
import datetime

In [42]:
def convertTime(time):
    return datetime.strptime(time, '%Y-%m-%d')

In [43]:
def getMonth(date):
    return(date.strftime('%b'))

In [44]:
climate['date'] = pd.to_datetime(climate['date'])
climate['month'] = climate['date'].apply(getMonth)

In [45]:
climate['month'] = climate['month'].replace(month_replace)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year,month
0,1001,2000-01-01,61.272425,50.160884,72.383967,0.560423,2000,1
1,1001,2000-01-02,63.342771,53.517596,73.167946,0.390251,2000,1
2,1001,2000-01-03,65.669476,57.20187,74.137082,0.21617,2000,1
3,1001,2000-01-04,61.166221,49.751427,72.581016,16.488676,2000,1
4,1001,2000-01-05,43.690288,29.923261,57.457315,0.216055,2000,1


In [46]:
climate = climate.drop(['date', 'fips'], axis = 1)
climate = climate.groupby(['year', 'month']).mean()
climate = climate.reset_index()
climate.head()

Unnamed: 0,year,month,tMean,tMin,tMax,prec
0,2000,1,33.699767,22.991576,44.407958,2.21446
1,2000,2,40.700009,28.945989,52.45403,2.164661
2,2000,3,48.228999,35.979568,60.478429,2.588274
3,2000,4,53.37646,40.754625,65.998294,2.785087
4,2000,5,65.413321,53.298932,77.527709,2.954953


In [47]:
climate

Unnamed: 0,year,month,tMean,tMin,tMax,prec
0,2000,1,33.699767,22.991576,44.407958,2.214460
1,2000,2,40.700009,28.945989,52.454030,2.164661
2,2000,3,48.228999,35.979568,60.478429,2.588274
3,2000,4,53.376460,40.754625,65.998294,2.785087
4,2000,5,65.413321,53.298932,77.527709,2.954953
...,...,...,...,...,...,...
235,2019,8,74.635736,63.286607,85.984866,2.799854
236,2019,9,71.510311,59.662641,83.357981,2.021365
237,2019,10,55.799142,44.211635,67.386648,3.321412
238,2019,11,41.660884,30.582931,52.738837,1.816941


In [48]:
suicide_climate = suicide_pop.merge(climate, on = ['year', 'month'], how = 'inner')
suicide_climate.head()

Unnamed: 0,year,month,suicide_means,deaths,pop,tMean,tMin,tMax,prec
0,2000,4,firearm_suicide,1373,253540949,53.37646,40.754625,65.998294,2.785087
1,2000,4,nonfirearm_suicide,1112,253540949,53.37646,40.754625,65.998294,2.785087
2,2000,8,firearm_suicide,1411,254358910,75.156451,63.039548,87.273353,2.242387
3,2000,8,nonfirearm_suicide,1151,254358910,75.156451,63.039548,87.273353,2.242387
4,2000,12,firearm_suicide,1280,255177896,27.428217,17.647957,37.208477,1.919926


In [50]:
suicide_climate = suicide_climate.sort_values(['year', 'month'])
suicide_climate

Unnamed: 0,year,month,suicide_means,deaths,pop,tMean,tMin,tMax,prec
8,2000,1,firearm_suicide,1449,252927063,33.699767,22.991576,44.407958,2.214460
9,2000,1,nonfirearm_suicide,1150,252927063,33.699767,22.991576,44.407958,2.214460
6,2000,2,firearm_suicide,1321,253131713,40.700009,28.945989,52.454030,2.164661
7,2000,2,nonfirearm_suicide,1024,253131713,40.700009,28.945989,52.454030,2.164661
14,2000,3,firearm_suicide,1468,253336339,48.228999,35.979568,60.478429,2.588274
...,...,...,...,...,...,...,...,...,...
477,2019,10,nonfirearm_suicide,2074,297903483,55.799142,44.211635,67.386648,3.321412
474,2019,11,firearm_suicide,1872,297905757,41.660884,30.582931,52.738837,1.816941
475,2019,11,nonfirearm_suicide,1738,297905757,41.660884,30.582931,52.738837,1.816941
460,2019,12,firearm_suicide,1878,297909611,38.711947,28.725012,48.698881,2.417830


In [51]:
suicide_climate['suicide_rate'] = (suicide_climate['deaths']/suicide_climate['pop'])*100000
suicide_climate.head()

Unnamed: 0,year,month,suicide_means,deaths,pop,tMean,tMin,tMax,prec,suicide_rate
8,2000,1,firearm_suicide,1449,252927063,33.699767,22.991576,44.407958,2.21446,0.572892
9,2000,1,nonfirearm_suicide,1150,252927063,33.699767,22.991576,44.407958,2.21446,0.454677
6,2000,2,firearm_suicide,1321,253131713,40.700009,28.945989,52.45403,2.164661,0.521863
7,2000,2,nonfirearm_suicide,1024,253131713,40.700009,28.945989,52.45403,2.164661,0.404532
14,2000,3,firearm_suicide,1468,253336339,48.228999,35.979568,60.478429,2.588274,0.579467


In [52]:
summer_winter = [6, 7, 8, 12, 1, 2]
def is_summer_winter(month):
    if month in summer_winter :
        return 1
    else:
        return 0

In [53]:
suicide_climate['summer_winter'] = suicide_climate['month'].apply(is_summer_winter)
suicide_climate.head()

Unnamed: 0,year,month,suicide_means,deaths,pop,tMean,tMin,tMax,prec,suicide_rate,summer_winter
8,2000,1,firearm_suicide,1449,252927063,33.699767,22.991576,44.407958,2.21446,0.572892,1
9,2000,1,nonfirearm_suicide,1150,252927063,33.699767,22.991576,44.407958,2.21446,0.454677,1
6,2000,2,firearm_suicide,1321,253131713,40.700009,28.945989,52.45403,2.164661,0.521863,1
7,2000,2,nonfirearm_suicide,1024,253131713,40.700009,28.945989,52.45403,2.164661,0.404532,1
14,2000,3,firearm_suicide,1468,253336339,48.228999,35.979568,60.478429,2.588274,0.579467,0


In [54]:
suicide_climate.to_csv('monthly_GAM_suicide_means.csv')