In [1]:
from pygam import LinearGAM, s, f

In [2]:
import dask.dataframe as ddf
import pandas as pd
import datetime
import functools
import seaborn as sns
import matplotlib.pyplot as plt

### Import suicide death data

In [3]:
years = [year for year in range(2000, 2020)]
years

[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019]

In [4]:
data = []
for year in years:
    suicide_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/Suicide_Death_NCHS/suicide_patient_level_1960_2020/overall/overall_suicide_patient_level_' + str(year) + '.csv', dtype = {'year': int, 'county_residence': str, 'county_death': str, 'state_residence': str, 'state_death': str})
    suicide_year = suicide_year.loc[:, ~suicide_year.columns.str.contains('^Unnamed')]
    data.append(suicide_year)

In [5]:
suicide = pd.concat(data)
suicide.head()

Unnamed: 0,year,county_residence,state_residence,state_death,county_death,month,sex,race,age,age_range,death_cause
0,2000,1115,1,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1101,1,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1001,1,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1003,1,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1015,1,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [6]:
suicide = suicide.drop(['county_residence', 'state_residence'], axis = 1)
suicide = suicide.rename(columns = {'state_death': 'statefips', 'county_death': 'fips'})
suicide.head()

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,1,1055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,1,1101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,1,1001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,1,1003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,1,1015,Jan,Male,White,6,age 25-64,firearm_suicide


In [7]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide
...,...,...,...,...,...,...,...,...,...
47669,2019,36,36081,Dec,Male,Other,5,age 25-64,nonfirearm_suicide
47670,2019,36,36081,Dec,Male,Black,5,age 25-64,nonfirearm_suicide
47671,2019,36,36005,Dec,Female,Black,4,age 0-24,nonfirearm_suicide
47672,2019,36,36005,Dec,Male,White,6,age 25-64,nonfirearm_suicide


In [8]:
suicide['statefips'].nunique()

52

### Add state information

In [9]:
state = pd.read_excel('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/temp_bins_suicide/test_paper/state_code.xlsx', engine='openpyxl', dtype = {'Code': int})
state.head()

Unnamed: 0,Code,State,Abbreviation,Alpha code
0,1,Alabama,Ala.,AL
1,2,Alaska,,AK
2,4,Arizona,Ariz.,AZ
3,5,Arkansas,Ark.,AR
4,6,California,Calif.,CA


In [10]:
def convertStateCode(code):
    return str(code).rjust(2, '0')

In [11]:
state['Code'] = state['Code'].apply(convertStateCode)
state = state.rename(columns = {'Code': 'statefips', 'State': 'state_name', 'Alpha code': 'state'})
state.head()

Unnamed: 0,statefips,state_name,Abbreviation,state
0,1,Alabama,Ala.,AL
1,2,Alaska,,AK
2,4,Arizona,Ariz.,AZ
3,5,Arkansas,Ark.,AR
4,6,California,Calif.,CA


In [12]:
suicide['statefips'].unique()

array(['01', 'na', '04', '05', '06', '08', '09', '10', '11', '12', '13',
       '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25',
       '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36',
       '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48',
       '49', '50', '51', '53', '54', '55', '56', '02'], dtype=object)

In [13]:
state = state.reset_index()
state['state_code'] = state['index'] + 1
state

Unnamed: 0,index,statefips,state_name,Abbreviation,state,state_code
0,0,1,Alabama,Ala.,AL,1
1,1,2,Alaska,,AK,2
2,2,4,Arizona,Ariz.,AZ,3
3,3,5,Arkansas,Ark.,AR,4
4,4,6,California,Calif.,CA,5
5,5,8,Colorado,Colo.,CO,6
6,6,9,Connecticut,Conn.,CT,7
7,7,10,Delaware,Del.,DE,8
8,8,11,District of Columbia,D.C.,DC,9
9,9,12,Florida,Fla.,FL,10


In [14]:
state['statefips'].unique()

array(['01', '02', '04', '05', '06', '08', '09', '10', '11', '12', '13',
       '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25',
       '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36',
       '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48',
       '49', '50', '51', '53', '54', '55', '56'], dtype=object)

In [15]:
suicide = suicide.merge(state[['statefips', 'state_name', 'state', 'state_code']], on = ['statefips'], how = 'inner')
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause,state_name,state,state_code
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide,Alabama,AL,1
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide,Alabama,AL,1
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide,Alabama,AL,1
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide,Alabama,AL,1
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide,Alabama,AL,1
...,...,...,...,...,...,...,...,...,...,...,...,...
763871,2019,02,02020,Dec,Male,White,4,age 0-24,firearm_suicide,Alaska,AK,2
763872,2019,02,02170,Oct,Male,White,8,age 25-64,firearm_suicide,Alaska,AK,2
763873,2019,02,02050,Dec,Male,White,4,age 0-24,nonfirearm_suicide,Alaska,AK,2
763874,2019,02,02122,Dec,Male,White,5,age 25-64,firearm_suicide,Alaska,AK,2


In [17]:
suicide['state'].nunique()

51

In [18]:
suicide['state_code'].nunique()

51

In [19]:
suicide['state_name'].nunique()

51

### Load population data

In [20]:
data_pop = []
for year in years:
    pop_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/temp_bins_suicide/Population/population_monthly/population_monthly_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    pop_year = pop_year.loc[:, ~pop_year.columns.str.contains('^Unnamed')]
    data_pop.append(pop_year)

In [21]:
pop = pd.concat(data_pop)
pop.head()

Unnamed: 0,year,fips,pop,month
0,2000,1001,39646,Jan
1,2000,1001,39758,Feb
2,2000,1001,39870,Mar
3,2000,1001,39982,Apr
4,2000,1001,40094,May


In [22]:
def findState(code):
    return code[:2]

In [23]:
pop['statefips'] = pop['fips'].apply(findState)
pop.head()

Unnamed: 0,year,fips,pop,month,statefips
0,2000,1001,39646,Jan,1
1,2000,1001,39758,Feb,1
2,2000,1001,39870,Mar,1
3,2000,1001,39982,Apr,1
4,2000,1001,40094,May,1


In [24]:
pop['statefips'].nunique()

51

In [25]:
pop = pop.merge(state[['statefips', 'state_name', 'state', 'state_code']], on = ['statefips'], how = 'inner')
pop.head()

Unnamed: 0,year,fips,pop,month,statefips,state_name,state,state_code
0,2000,1001,39646,Jan,1,Alabama,AL,1
1,2000,1001,39758,Feb,1,Alabama,AL,1
2,2000,1001,39870,Mar,1,Alabama,AL,1
3,2000,1001,39982,Apr,1,Alabama,AL,1
4,2000,1001,40094,May,1,Alabama,AL,1


In [26]:
pop['statefips'].nunique()

51

In [27]:
month_replace = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
                'Nov': 11, 'Dec': 12}
pop['month'] = pop['month'].replace(month_replace)
pop['month'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [28]:
pop.head()

Unnamed: 0,year,fips,pop,month,statefips,state_name,state,state_code
0,2000,1001,39646,1,1,Alabama,AL,1
1,2000,1001,39758,2,1,Alabama,AL,1
2,2000,1001,39870,3,1,Alabama,AL,1
3,2000,1001,39982,4,1,Alabama,AL,1
4,2000,1001,40094,5,1,Alabama,AL,1


In [29]:
pop_month = pop.drop(['fips'], axis = 1)
pop_month = pop_month.groupby(['year', 'month', 'statefips', 'state_name', 'state', 'state_code']).sum()
pop_month = pop_month.reset_index()
pop_month.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,pop
0,2000,1,1,Alabama,AL,1,4080223
1,2000,1,2,Alaska,AK,2,580131
2,2000,1,4,Arizona,AZ,3,4525276
3,2000,1,5,Arkansas,AR,4,2397684
4,2000,1,6,California,CA,5,30860234


In [30]:
pop_month['statefips'].nunique()

51

In [31]:
suicide

Unnamed: 0,year,statefips,fips,month,sex,race,age,age_range,death_cause,state_name,state,state_code
0,2000,01,01055,Jan,Male,White,8,age 25-64,firearm_suicide,Alabama,AL,1
1,2000,01,01101,Jan,Male,Black,4,age 0-24,firearm_suicide,Alabama,AL,1
2,2000,01,01001,Jan,Female,White,6,age 25-64,firearm_suicide,Alabama,AL,1
3,2000,01,01003,Jan,Female,White,3,age 0-24,nonfirearm_suicide,Alabama,AL,1
4,2000,01,01015,Jan,Male,White,6,age 25-64,firearm_suicide,Alabama,AL,1
...,...,...,...,...,...,...,...,...,...,...,...,...
763871,2019,02,02020,Dec,Male,White,4,age 0-24,firearm_suicide,Alaska,AK,2
763872,2019,02,02170,Oct,Male,White,8,age 25-64,firearm_suicide,Alaska,AK,2
763873,2019,02,02050,Dec,Male,White,4,age 0-24,nonfirearm_suicide,Alaska,AK,2
763874,2019,02,02122,Dec,Male,White,5,age 25-64,firearm_suicide,Alaska,AK,2


In [32]:
suicide_death = suicide.drop(['fips', 'sex', 'race', 'age', 'age_range'], axis = 1)
suicide_death = suicide_death.groupby(['year','month', 'statefips', 'state_name', 'state', 'state_code']).count()
suicide_death = suicide_death.reset_index()
suicide_death.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,death_cause
0,2000,Apr,1,Alabama,AL,1,53
1,2000,Apr,4,Arizona,AZ,3,76
2,2000,Apr,5,Arkansas,AR,4,26
3,2000,Apr,6,California,CA,5,257
4,2000,Apr,8,Colorado,CO,6,55


In [33]:
suicide_death['year'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])

In [34]:
suicide_death['month'] = suicide_death['month'].replace(month_replace)
suicide_death.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,death_cause
0,2000,4,1,Alabama,AL,1,53
1,2000,4,4,Arizona,AZ,3,76
2,2000,4,5,Arkansas,AR,4,26
3,2000,4,6,California,CA,5,257
4,2000,4,8,Colorado,CO,6,55


In [36]:
suicide_death['state'].nunique()

51

In [37]:
suicide_pop = suicide_death.merge(pop_month, on = ['year', 'month', 'statefips', 'state_name', 'state', 'state_code'], how = 'inner')
suicide_pop.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,death_cause,pop
0,2000,4,1,Alabama,AL,1,53,4065001
1,2000,4,4,Arizona,AZ,3,76,4538286
2,2000,4,5,Arkansas,AR,4,26,2408222
3,2000,4,6,California,CA,5,257,30743172
4,2000,4,8,Colorado,CO,6,55,3956218


In [38]:
suicide_pop['state'].nunique()

51

In [39]:
suicide_pop = suicide_pop.rename(columns = {'death_cause': 'deaths'})
suicide_pop['suicide_rate'] = (suicide_pop['deaths']/suicide_pop['pop'])*100000
suicide_pop.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,deaths,pop,suicide_rate
0,2000,4,1,Alabama,AL,1,53,4065001,1.303813
1,2000,4,4,Arizona,AZ,3,76,4538286,1.674641
2,2000,4,5,Arkansas,AR,4,26,2408222,1.079635
3,2000,4,6,California,CA,5,257,30743172,0.835958
4,2000,4,8,Colorado,CO,6,55,3956218,1.390217


### Load temperature and precipitation data from PRISM

In [40]:
data_temp = []
for year in years:
    temp_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/PRISM_Data/PRISM_daily_county_level/prism_daily_county_level_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    temp_year = temp_year.loc[:, ~temp_year.columns.str.contains('^Unnamed')]
    data_temp.append(temp_year)

In [41]:
climate = pd.concat(data_temp)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year
0,1001,2000-01-01,16.262459,10.08938,22.435537,0.560423,2000
1,1001,2000-01-02,17.412651,11.95422,22.871081,0.390251,2000
2,1001,2000-01-03,18.705264,14.001039,23.40949,0.21617,2000
3,1001,2000-01-04,16.203456,9.861904,22.545009,16.488676,2000
4,1001,2000-01-05,6.494604,-1.153744,14.142953,0.216055,2000


In [42]:
climate['statefips'] = climate['fips'].apply(findState)

In [43]:
climate['statefips'].nunique()

49

In [44]:
climate['statefips'].unique()

array(['01', '04', '05', '06', '08', '09', '10', '11', '12', '13', '16',
       '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27',
       '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38',
       '39', '40', '41', '42', '44', '45', '46', '47', '48', '49', '50',
       '51', '53', '54', '55', '56'], dtype=object)

In [45]:
state

Unnamed: 0,index,statefips,state_name,Abbreviation,state,state_code
0,0,1,Alabama,Ala.,AL,1
1,1,2,Alaska,,AK,2
2,2,4,Arizona,Ariz.,AZ,3
3,3,5,Arkansas,Ark.,AR,4
4,4,6,California,Calif.,CA,5
5,5,8,Colorado,Colo.,CO,6
6,6,9,Connecticut,Conn.,CT,7
7,7,10,Delaware,Del.,DE,8
8,8,11,District of Columbia,D.C.,DC,9
9,9,12,Florida,Fla.,FL,10


In [32]:
def celsius_to_fahrenheit(celsius):
    return (celsius * 9/5) + 32

In [33]:
climate['tMean'] = climate['tMean'].apply(celsius_to_fahrenheit)
climate['tMin'] = climate['tMin'].apply(celsius_to_fahrenheit)
climate['tMax'] = climate['tMax'].apply(celsius_to_fahrenheit)

In [34]:
import datetime

In [35]:
def convertTime(time):
    return datetime.strptime(time, '%Y-%m-%d')

In [36]:
def getMonth(date):
    return(date.strftime('%b'))

In [37]:
climate['date'] = pd.to_datetime(climate['date'])
climate['month'] = climate['date'].apply(getMonth)

In [38]:
climate['month'] = climate['month'].replace(month_replace)
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year,statefips,month
0,1001,2000-01-01,61.272425,50.160884,72.383967,0.560423,2000,1,1
1,1001,2000-01-02,63.342771,53.517596,73.167946,0.390251,2000,1,1
2,1001,2000-01-03,65.669476,57.20187,74.137082,0.21617,2000,1,1
3,1001,2000-01-04,61.166221,49.751427,72.581016,16.488676,2000,1,1
4,1001,2000-01-05,43.690288,29.923261,57.457315,0.216055,2000,1,1


In [39]:
climate = climate.merge(state[['statefips', 'state_name', 'state', 'state_code']], on = ['statefips'], how = 'inner')
climate.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year,statefips,month,state_name,state,state_code
0,1001,2000-01-01,61.272425,50.160884,72.383967,0.560423,2000,1,1,Alabama,AL,1
1,1001,2000-01-02,63.342771,53.517596,73.167946,0.390251,2000,1,1,Alabama,AL,1
2,1001,2000-01-03,65.669476,57.20187,74.137082,0.21617,2000,1,1,Alabama,AL,1
3,1001,2000-01-04,61.166221,49.751427,72.581016,16.488676,2000,1,1,Alabama,AL,1
4,1001,2000-01-05,43.690288,29.923261,57.457315,0.216055,2000,1,1,Alabama,AL,1


In [40]:
climate = climate.drop(['fips', 'date'], axis = 1)
climate = climate.groupby(['year', 'month', 'statefips', 'state_name', 'state', 'state_code']).mean()
climate = climate.reset_index()
climate.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,tMean,tMin,tMax,prec
0,2000,1,1,Alabama,AL,1,45.726435,34.72464,56.72823,4.144743
1,2000,1,4,Arizona,AZ,3,47.05167,31.583661,62.51968,0.289194
2,2000,1,5,Arkansas,AR,4,41.456516,31.150597,51.762435,2.154072
3,2000,1,6,California,CA,5,45.209225,36.281889,54.136562,6.377559
4,2000,1,8,Colorado,CO,6,27.428198,14.315464,40.540933,1.249149


In [41]:
suicide_climate = suicide_pop.merge(climate, on = ['year', 'month', 'statefips', 'state_name', 'state', 'state_code'], how = 'inner')
suicide_climate.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,deaths,pop,suicide_rate,tMean,tMin,tMax,prec
0,2000,4,1,Alabama,AL,1,53,4065001,1.303813,59.684953,46.193093,73.176814,4.321359
1,2000,4,4,Arizona,AZ,3,76,4538286,1.674641,62.882575,45.452718,80.312432,0.164545
2,2000,4,5,Arkansas,AR,4,26,2408222,1.079635,59.136681,47.225909,71.047454,3.235783
3,2000,4,6,California,CA,5,257,30743172,0.835958,56.442361,43.359611,69.525111,1.93321
4,2000,4,8,Colorado,CO,6,55,3956218,1.390217,44.750284,28.860093,60.640476,1.323711


In [42]:
summer_winter = [6, 7, 8, 12, 1, 2]
def is_summer_winter(month):
    if month in summer_winter :
        return 1
    else:
        return 0

In [43]:
suicide_climate['summer_winter'] = suicide_climate['month'].apply(is_summer_winter)
suicide_climate.head()

Unnamed: 0,year,month,statefips,state_name,state,state_code,deaths,pop,suicide_rate,tMean,tMin,tMax,prec,summer_winter
0,2000,4,1,Alabama,AL,1,53,4065001,1.303813,59.684953,46.193093,73.176814,4.321359,0
1,2000,4,4,Arizona,AZ,3,76,4538286,1.674641,62.882575,45.452718,80.312432,0.164545,0
2,2000,4,5,Arkansas,AR,4,26,2408222,1.079635,59.136681,47.225909,71.047454,3.235783,0
3,2000,4,6,California,CA,5,257,30743172,0.835958,56.442361,43.359611,69.525111,1.93321,0
4,2000,4,8,Colorado,CO,6,55,3956218,1.390217,44.750284,28.860093,60.640476,1.323711,0


In [44]:
suicide_climate.to_csv('monthly_GAM_state.csv')

In [None]:
!pip install 