In [1]:
from pygam import LinearGAM, s, f

In [2]:
import dask.dataframe as ddf
import pandas as pd
import datetime
import functools
import seaborn as sns
import matplotlib.pyplot as plt

### Import suicide death data

In [3]:
years = [year for year in range(2000, 2020)]
years

[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019]

In [5]:
data = []
for year in years:
    suicide_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/Suicide_Death_NCHS/suicide_patient_level_1960_2020/overall/overall_suicide_patient_level_' + str(year) + '.csv', dtype = {'year': int, 'county_residence': str, 'county_death': str, 'state_residence': str, 'state_death': str})
    suicide_year = suicide_year.loc[:, ~suicide_year.columns.str.contains('^Unnamed')]
    data.append(suicide_year)

In [6]:
suicide = pd.concat(data)
suicide.head()

Unnamed: 0,year,county_residence,state_residence,state_death,county_death,month,sex,race,age,age_range,death_cause
0,2000,1115,1,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1101,1,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1001,1,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1003,1,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1015,1,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [7]:
suicide = suicide.drop(['county_residence', 'state_residence'], axis = 1)
suicide = suicide.rename(columns = {'state_death': 'statefips', 'county_death': 'fips'})
suicide.head()

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [8]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide
...,...,...,...,...,...,...,...,...,...
47669,2019,36,36081,Dec,Male,Other,5,age 25-64,nonfirearm_suicide
47670,2019,36,36081,Dec,Male,Black,5,age 25-64,nonfirearm_suicide
47671,2019,36,36005,Dec,Female,Black,4,age 0-24,nonfirearm_suicide
47672,2019,36,36005,Dec,Male,White,6,age 25-64,nonfirearm_suicide


### Load population data and add region information

In [9]:
data_pop = []
for year in years:
    pop_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/temp_bins_suicide/Population/population_monthly/population_monthly_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    pop_year = pop_year.loc[:, ~pop_year.columns.str.contains('^Unnamed')]
    data_pop.append(pop_year)

In [10]:
pop = pd.concat(data_pop)
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,Jan
1,2000,1001,39758,Feb
2,2000,1001,39870,Mar
3,2000,1001,39982,Apr
4,2000,1001,40094,May


In [11]:
month_replace = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
                'Nov': 11, 'Dec': 12}
pop['month'] = pop['month'].replace(month_replace)
pop['month'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [12]:
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,1
1,2000,1001,39758,2
2,2000,1001,39870,3
3,2000,1001,39982,4
4,2000,1001,40094,5


In [13]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide
...,...,...,...,...,...,...,...,...,...
47669,2019,36,36081,Dec,Male,Other,5,age 25-64,nonfirearm_suicide
47670,2019,36,36081,Dec,Male,Black,5,age 25-64,nonfirearm_suicide
47671,2019,36,36005,Dec,Female,Black,4,age 0-24,nonfirearm_suicide
47672,2019,36,36005,Dec,Male,White,6,age 25-64,nonfirearm_suicide


In [14]:
suicide_death = suicide.drop(['statefips', 'sex', 'race', 'age', 'age_range'], axis = 1)
suicide_death = suicide_death.groupby(['year', 'fips', 'month']).count()
suicide_death = suicide_death.reset_index()
suicide_death.head()

Unnamed: 0,year,fips,month,death_cause
0,2000,1001,Apr,1
1,2000,1001,Aug,2
2,2000,1001,Feb,2
3,2000,1001,Jan,1
4,2000,1001,Jun,1


In [15]:
suicide_death['year'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])

In [16]:
suicide_death['month'] = suicide_death['month'].replace(month_replace)
suicide_death.head()

Unnamed: 0,year,fips,month,death_cause
0,2000,1001,4,1
1,2000,1001,8,2
2,2000,1001,2,2
3,2000,1001,1,1
4,2000,1001,6,1


In [17]:
suicide_pop = suicide_death.merge(pop, on = ['year', 'month', 'fips'], how = 'inner')
suicide_pop.head()

Unnamed: 0,year,fips,month,death_cause,pop
0,2000,1001,4,1,39982
1,2000,1001,8,2,40431
2,2000,1001,2,2,39758
3,2000,1001,1,1,39646
4,2000,1001,6,1,40207


In [18]:
suicide_pop = suicide_pop.rename(columns = {'death_cause': 'deaths'})
suicide_pop['suicide_rate'] = (suicide_pop['deaths']/suicide_pop['pop'])*100000
suicide_pop.head()

Unnamed: 0,year,fips,month,deaths,pop,suicide_rate
0,2000,1001,4,1,39982,2.501126
1,2000,1001,8,2,40431,4.946699
2,2000,1001,2,2,39758,5.030434
3,2000,1001,1,1,39646,2.522323
4,2000,1001,6,1,40207,2.487129


### Load temperature and precipitation data from PRISM

In [35]:
data_temp = []
for year in years:
    temp_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/PRISM_Data/PRISM_daily_county_level/prism_daily_county_level_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    temp_year = temp_year.loc[:, ~temp_year.columns.str.contains('^Unnamed')]
    data_temp.append(temp_year)

In [36]:
climate = pd.concat(data_temp)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year
0,1001,2000-01-01,16.262459,10.08938,22.435537,0.560423,2000
1,1001,2000-01-02,17.412651,11.95422,22.871081,0.390251,2000
2,1001,2000-01-03,18.705264,14.001039,23.40949,0.21617,2000
3,1001,2000-01-04,16.203456,9.861904,22.545009,16.488676,2000
4,1001,2000-01-05,6.494604,-1.153744,14.142953,0.216055,2000


In [37]:
def celsius_to_fahrenheit(celsius):
    return (celsius * 9/5) + 32

In [38]:
climate['tMean'] = climate['tMean'].apply(celsius_to_fahrenheit)
climate['tMin'] = climate['tMin'].apply(celsius_to_fahrenheit)
climate['tMax'] = climate['tMax'].apply(celsius_to_fahrenheit)

In [39]:
import datetime

In [40]:
def convertTime(time):
    return datetime.strptime(time, '%Y-%m-%d')

In [41]:
def getMonth(date):
    return(date.strftime('%b'))

In [42]:
climate['date'] = pd.to_datetime(climate['date'])
climate['month'] = climate['date'].apply(getMonth)

In [43]:
climate['month'] = climate['month'].replace(month_replace)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year,month
0,1001,2000-01-01,61.272425,50.160884,72.383967,0.560423,2000,1
1,1001,2000-01-02,63.342771,53.517596,73.167946,0.390251,2000,1
2,1001,2000-01-03,65.669476,57.20187,74.137082,0.21617,2000,1
3,1001,2000-01-04,61.166221,49.751427,72.581016,16.488676,2000,1
4,1001,2000-01-05,43.690288,29.923261,57.457315,0.216055,2000,1


In [44]:
climate = climate.drop(['date'], axis = 1)
climate = climate.groupby(['year', 'month', 'fips']).mean()
climate = climate.reset_index()
climate.head()

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec
0,2000,1,1001,48.16656,37.347152,58.985968,4.190979
1,2000,1,1003,52.781099,41.5319,64.030298,2.639092
2,2000,1,1005,48.316217,36.439576,60.192859,3.507654
3,2000,1,1007,45.746169,34.33067,57.161669,4.497632
4,2000,1,1009,42.205221,32.07842,52.332022,4.782151


In [45]:
climate_median = climate.drop(['fips', 'tMin', 'tMax', 'prec'], axis = 1)
climate_median = climate_median.groupby(['year', 'month']).median()
climate_median = climate_median.reset_index()
climate_median.head()

Unnamed: 0,year,month,tMean
0,2000,1,32.997964
1,2000,2,40.919949
2,2000,3,47.257493
3,2000,4,53.083772
4,2000,5,65.883401


In [46]:
climate_median = climate_median.rename(columns = {'tMean': 'median_temp'})
climate_median.head()

Unnamed: 0,year,month,median_temp
0,2000,1,32.997964
1,2000,2,40.919949
2,2000,3,47.257493
3,2000,4,53.083772
4,2000,5,65.883401


In [47]:
climate = climate.merge(climate_median, on = ['year', 'month'], how = 'inner')
climate.head()

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec,median_temp
0,2000,1,1001,48.16656,37.347152,58.985968,4.190979,32.997964
1,2000,1,1003,52.781099,41.5319,64.030298,2.639092,32.997964
2,2000,1,1005,48.316217,36.439576,60.192859,3.507654,32.997964
3,2000,1,1007,45.746169,34.33067,57.161669,4.497632,32.997964
4,2000,1,1009,42.205221,32.07842,52.332022,4.782151,32.997964


In [48]:
def divideByMedianTemp(tMean, median_temp):
    if tMean >= median_temp:
        return 'above'
    else:
        return 'below'    

In [49]:
climate['temp_group'] = climate.apply(lambda x: divideByMedianTemp(x['tMean'], x['median_temp']), axis = 1)
climate.head()

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec,median_temp,temp_group
0,2000,1,1001,48.16656,37.347152,58.985968,4.190979,32.997964,above
1,2000,1,1003,52.781099,41.5319,64.030298,2.639092,32.997964,above
2,2000,1,1005,48.316217,36.439576,60.192859,3.507654,32.997964,above
3,2000,1,1007,45.746169,34.33067,57.161669,4.497632,32.997964,above
4,2000,1,1009,42.205221,32.07842,52.332022,4.782151,32.997964,above


In [50]:
climate

Unnamed: 0,year,month,fips,tMean,tMin,tMax,prec,median_temp,temp_group
0,2000,1,01001,48.166560,37.347152,58.985968,4.190979,32.997964,above
1,2000,1,01003,52.781099,41.531900,64.030298,2.639092,32.997964,above
2,2000,1,01005,48.316217,36.439576,60.192859,3.507654,32.997964,above
3,2000,1,01007,45.746169,34.330670,57.161669,4.497632,32.997964,above
4,2000,1,01009,42.205221,32.078420,52.332022,4.782151,32.997964,above
...,...,...,...,...,...,...,...,...,...
745195,2019,12,56037,17.121235,7.089279,27.153191,0.632654,39.135761,below
745196,2019,12,56039,17.573244,9.812942,25.333545,2.434054,39.135761,below
745197,2019,12,56041,18.615721,7.988787,29.242656,0.900065,39.135761,below
745198,2019,12,56043,23.729262,12.816991,34.641533,0.354680,39.135761,below


In [51]:
suicide_pop.head()

Unnamed: 0,year,fips,month,deaths,pop,suicide_rate
0,2000,1001,4,1,39982,2.501126
1,2000,1001,8,2,40431,4.946699
2,2000,1001,2,2,39758,5.030434
3,2000,1001,1,1,39646,2.522323
4,2000,1001,6,1,40207,2.487129


In [52]:
suicide_climate = suicide_pop.merge(climate, on = ['year', 'month', 'fips'], how = 'inner')
suicide_climate.head()

Unnamed: 0,year,fips,month,deaths,pop,suicide_rate,tMean,tMin,tMax,prec,median_temp,temp_group
0,2000,1001,4,1,39982,2.501126,61.008822,47.667463,74.350182,2.889071,53.083772,above
1,2000,1001,8,2,40431,4.946699,81.769922,68.916341,94.623504,1.970743,75.052327,above
2,2000,1001,2,2,39758,5.030434,53.17536,39.102574,67.248145,1.57738,40.919949,above
3,2000,1001,1,1,39646,2.522323,48.16656,37.347152,58.985968,4.190979,32.997964,above
4,2000,1001,6,1,40207,2.487129,78.463166,66.422098,90.504235,2.316979,71.55042,above


In [56]:
#suicide_climate = suicide_climate.drop(['suicide_rate', 'median_temp', 'fips'], axis = 1)
suicide_death = suicide_climate[['year', 'month', 'temp_group', 'deaths', 'pop']]
suicide_temp = suicide_climate[['year', 'month', 'temp_group', 'tMean', 'tMin', 'tMax', 'prec']]
suicide_death = suicide_death.groupby(['year', 'month', 'temp_group']).sum()
suicide_temp = suicide_temp.groupby(['year', 'month', 'temp_group']).mean()

In [57]:
suicide_death = suicide_death.reset_index()
suicide_death.head()

Unnamed: 0,year,month,temp_group,deaths,pop
0,2000,1,above,1474,106494121
1,2000,1,below,1001,81805132
2,2000,2,above,1286,98644577
3,2000,2,below,971,86543377
4,2000,3,above,1380,102187280


In [58]:
suicide_temp = suicide_temp.reset_index()
suicide_temp.head()

Unnamed: 0,year,month,temp_group,tMean,tMin,tMax,prec
0,2000,1,above,44.008371,33.198792,54.81795,3.156511
1,2000,1,below,24.651474,14.980885,34.322063,1.927658
2,2000,2,above,49.760446,37.252614,62.268279,2.588052
3,2000,2,below,32.29396,22.370597,42.217324,2.281915
4,2000,3,above,56.017723,43.484168,68.551278,3.107751


In [64]:
suicide_climate = suicide_death.merge(suicide_temp, on = ['year', 'month', 'temp_group'], how = 'inner')
suicide_climate.head()

Unnamed: 0,year,month,temp_group,deaths,pop,tMean,tMin,tMax,prec
0,2000,1,above,1474,106494121,44.008371,33.198792,54.81795,3.156511
1,2000,1,below,1001,81805132,24.651474,14.980885,34.322063,1.927658
2,2000,2,above,1286,98644577,49.760446,37.252614,62.268279,2.588052
3,2000,2,below,971,86543377,32.29396,22.370597,42.217324,2.281915
4,2000,3,above,1380,102187280,56.017723,43.484168,68.551278,3.107751


In [65]:
suicide_climate

Unnamed: 0,year,month,temp_group,deaths,pop,tMean,tMin,tMax,prec
0,2000,1,above,1474,106494121,44.008371,33.198792,54.817950,3.156511
1,2000,1,below,1001,81805132,24.651474,14.980885,34.322063,1.927658
2,2000,2,above,1286,98644577,49.760446,37.252614,62.268279,2.588052
3,2000,2,below,971,86543377,32.293960,22.370597,42.217324,2.281915
4,2000,3,above,1380,102187280,56.017723,43.484168,68.551278,3.107751
...,...,...,...,...,...,...,...,...,...
475,2019,10,below,1583,89044000,48.569717,37.733736,59.405698,3.246236
476,2019,11,above,2111,162058901,49.571002,37.675234,61.466770,2.154658
477,2019,11,below,1466,89181049,34.652927,25.284662,44.021191,1.685092
478,2019,12,above,2010,139936570,48.171581,37.566586,58.776576,3.080424


In [66]:
suicide_climate['suicide_rate'] = (suicide_climate['deaths']/suicide_climate['pop'])*100000
suicide_climate.head()

Unnamed: 0,year,month,temp_group,deaths,pop,tMean,tMin,tMax,prec,suicide_rate
0,2000,1,above,1474,106494121,44.008371,33.198792,54.81795,3.156511,1.384114
1,2000,1,below,1001,81805132,24.651474,14.980885,34.322063,1.927658,1.22364
2,2000,2,above,1286,98644577,49.760446,37.252614,62.268279,2.588052,1.30367
3,2000,2,below,971,86543377,32.29396,22.370597,42.217324,2.281915,1.121981
4,2000,3,above,1380,102187280,56.017723,43.484168,68.551278,3.107751,1.350462


In [67]:
summer_winter = [6, 7, 8, 12, 1, 2]
def is_summer_winter(month):
    if month in summer_winter :
        return 1
    else:
        return 0

In [68]:
suicide_climate['summer_winter'] = suicide_climate['month'].apply(is_summer_winter)
suicide_climate.head()

Unnamed: 0,year,month,temp_group,deaths,pop,tMean,tMin,tMax,prec,suicide_rate,summer_winter
0,2000,1,above,1474,106494121,44.008371,33.198792,54.81795,3.156511,1.384114,1
1,2000,1,below,1001,81805132,24.651474,14.980885,34.322063,1.927658,1.22364,1
2,2000,2,above,1286,98644577,49.760446,37.252614,62.268279,2.588052,1.30367,1
3,2000,2,below,971,86543377,32.29396,22.370597,42.217324,2.281915,1.121981,1
4,2000,3,above,1380,102187280,56.017723,43.484168,68.551278,3.107751,1.350462,0


In [69]:
suicide_climate.to_csv('monthly_GAM_median_temperature.csv')