In [None]:
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 10 11:24:37 2020

@author: Allison Young
"""


### Mapping Functions

def createDeathsShapefile (prior_shp,recent_shp):
    '''
    
    Summmary
    ----------
   
    Function that accepts shapefiles for for the previous data structure (2010-2013), 
    and new data structure (2014-2016), and then drops irrelevant columns, creates
    synonymous columns for data sets, and merges the datasets with the following 
    features:
        ['X', 'Y', 'DEATHYEAR', 'AGEUNITS', 'SEX', 'EDUCATION', 'RACE',
        'HISPANIC', 'COUNTYRES', 'COUNTYOCC', 'ACME', 'MENT1', 'MENT2', 'MENT3',
        'MENT4', 'MENT5', 'MENT6', 'MENT7', 'MENT8', 'MENT9', 'MENT10',
        'MENT11', 'MENT12', 'MENT13', 'MENT14', 'MENT15', 'MENT16', 'MENT17',
        'MENT18', 'MENT19', 'MENT20', 'geometry']
        
    Parameters
    ----------
    prior_shp : 2010-2013 format NC deaths shapefile (Points)
        
    recent_shp : 2014-2016 format NC deaths shapefile (Points)
       

    Returns
    -------
    deaths : Combined NC deaths Shapefile with limited features (Points)
        

    '''
    import geopandas as gpd
    import pandas as pd
    prior_shp = prior_shp[['X', 'Y','DEATHYEAR','AGEUNITS','SEX',"EDUCATION",'RACE',"HISPANIC","CORES2","COUNTYOCC",'ACME', 'MENT1', 'MENT2', 'MENT3', 'geometry']] #create subset
    recent_shp = recent_shp[["X","Y","DOD_YR","AGEUNITS","SEX","DEDUC","DRACEBRIDG","DHISPBRIDG","CORES2","COOCC",'ACME', 'MENT1', 'MENT2', 'MENT3', 'geometry']] #create subset
    prior_clean = prior_shp.rename(columns={'CORES2': 'COUNTYRES'}) #change column names for combining
    recent_clean = recent_shp.rename(columns={'DOD_YR': 'DEATHYEAR', 'DEDUC': 'EDUCATION', "DRACEBRIDG":'RACE',"DHISPBRIDG":"HISPANIC","COOCC":"COUNTYOCC", "CORES2":"COUNTYRES"})#change column names for combining
    dataframesList = [recent_clean,prior_clean] #create list to concat
    deaths = gpd.GeoDataFrame( pd.concat( dataframesList, ignore_index=True) ) #combine shapefiles with concat
    return deaths #return a shapefile of deaths for opioids only



def filterOpioidDeaths(deaths):
    '''
    Summary
    ----------
    Function that accepts a shapefile of deaths, and uses the definitions 
    offered through the NC Injury Branch 
    https://www.injuryfreenc.ncdhhs.gov/DataSurveillance/poisoning/SummaryTableforPoisoningDefinitions-13Nov18-FINAL.pdf
    
    Parameters
    ----------
    deaths : Point Shapefile
        Shapefile of deaths including ACME cause and top three mentions for 
        cause of death (MENT1, MENT2, MENT3).

    Returns
    -------
    Opdeaths : Point Shapefile
        Shapefile of deaths filtered to just those with cause of death of 
        Opioids, as defined by ACME and top 3 mentioned causes of death. 

    '''
    import geopandas as gpd
    import pandas as pd
    deaths = deaths.reset_index()
    deaths["NewID"] = deaths.index + 100
    primarycause = ['X40','X41','X42' ,'X43' , 'X44' ,'X60' ,'X61' ,'X62', 'X63' ,'X64' ,'X85' ,'Y10' ,'Y11' , 'Y12' , 'Y13', 'Y14' ]
    othermention = ['T401', 'T402', 'T403','T404','T406']
    top3ment = ["MENT1", "MENT2", "MENT3"]
    deaths_filt1 = deaths[deaths["ACME"].isin(primarycause)]
    allment = gpd.GeoDataFrame()
    for i in top3ment:
        mentions = deaths[deaths[i].isin(othermention)]
        allment = allment.append(mentions)
    opdfList = [deaths_filt1,allment]
    Opdeaths = gpd.GeoDataFrame( pd.concat( opdfList, ignore_index=True) )
    Opdeaths= Opdeaths.drop_duplicates("NewID")
    print("Filtered to just Opioid Deaths")
    return Opdeaths
    
def createNCPopulationShp(USCountiesShp,Year, ACSPASS):
    '''
    Summary
    --------- 
    Function that accepts a shapefile of US county boundaries from TigerLine Download, the desired Year
    of population data, and an ACS API pass key. The function cuts the shapefile to just NC counties, 
    pulls total and race/ethnicity population data from the year designated (the rough midpoint of outcome data),
    renames columns for clarity, and merges the population data to the NC county shapefile. A shapefile
    of NC counties with population data is returned.

    Parameters
    ----------
    USCountiesShp : GeoPandas Shapefile (Polygon)
        Shapefile of all county boundaries from https://www.census.gov/cgi-bin/geo/shapefiles/index.php.
    ACSPASS : API Key
        Obtain from https://api.census.gov/data/key_signup.html.
    Year : Year (XXXX)
        Year of data that will be used when pulling ACS population data.

    Returns
    -------
    NCcountiespop : GeoPandas Shapefile (Polygon)
        NC County boundaries, with NC race and ethnicity population data.

    '''
    import censusdata
    NCcounties= USCountiesShp[USCountiesShp['STATEFP']=='37'] #restrict to NC only
    ncpop = censusdata.download('acs5', Year,
                             censusdata.censusgeo([('state', '37'),('county','*')]),
                             ["GEO_ID","B02001_001E", "B02001_002E", "B02001_003E","B03002_012E"],ACSPASS,'detail') #download total, white, black, and hispanic population data
    ncpop= ncpop.rename(columns={'B02001_001E': 'Total Population','B02001_002E': 'White','B02001_003E':'Black', 'B03002_012E':'Hispanic'}) #rename columns
    ncpop['GEOID']= ncpop["GEO_ID"].str[-5:]  #create linking GEOID field to link to shp
    NCcountiespop = NCcounties.merge(ncpop, on='GEOID') #add population data to shapefile    
    return NCcountiespop #return NCcountiespop


def joinDeathsandCountyShp (OpDeaths, NCcountiespop):
    '''
    Summary:
    ----------
    Function that takes a shapefile of deaths and counties,
    converts them to the same projection (EPSG: 4269),
    and joins them with a geopandas spatial join
    

    Parameters
    ----------
    OpDeaths : Shapefile of Deaths (Points)
        Combined shapefile of deaths.
    NCcountiespop : Shapefile of County Boundaries with population by race (Polygon)
        NC Counties with race population data from ACS.

    Returns
    -------
    opdjoin : Shapefile of Deaths by County. 

    '''
    import fiona
    import geopandas as gpd
    OpDeaths.crs = fiona.crs.from_epsg(2264) 
    OpDeaths = OpDeaths.to_crs({'init': 'epsg:4269'})
    opdjoin = gpd.sjoin(NCcountiespop,OpDeaths)
    return opdjoin

def countOPDbyRace (opdjoin, NCcountiespop):
    '''
    Summary
    ----------
    Function that takes a joined shapefile of deaths, calculates a count by race,
    and joins with county shapefile.

    Parameters
    ----------
    opdjoin : Shapefile of Deaths by County
        
    NCcountiespop : Shapefile of NC County populations
       

    Returns
    -------
    NCcountyreOPD : County Shapefile with counts and rates of opioid deaths by county 
      

    '''
    import pandas as pd
    opdpivot = pd.pivot_table(opdjoin,index='GEO_ID',aggfunc={'NAME':len})
    racepivot=pd.pivot_table(opdjoin,index='GEO_ID',columns='RACE',aggfunc={'RACE':len})
    ethpivot= pd.pivot_table(opdjoin,index='GEO_ID',columns='HISPANIC',aggfunc={'HISPANIC':len})
    NCcountyOPD = NCcountiespop.merge(opdpivot, how='left',on='GEO_ID')
    NCcountyOPD= NCcountyOPD.rename(columns={'NAME_y':'OpDeaths'})
    NCcountyrOPD = NCcountyOPD.merge(racepivot, how='left', on='GEO_ID')
    NCcountyreOPD= NCcountyrOPD.merge(ethpivot,how='left',on='GEO_ID')
    return NCcountyreOPD

def calculateSelectRaceRates (NCcountyreOPD):
    '''
    
    Summary
    ----------
    Function that takes a Shapefile of NC Counties with data from ACS and counts
    of opioid deaths by county by race. Calculates rates of opioid deaths by county (with mid-point population)

    Parameters
    ----------
    NCcountyreOPD : Polygon shapefile 
        Shapefile with data on race population and opioid deaths by county.

    Returns
    -------
    NCOpdf : Shapefile
        Same shapefile a passed as parameter, with added rate columns.

    '''
    NCcountyreOPD["HispanicOPD"]= NCcountyreOPD['HISPANIC', 'C'].fillna(0)+NCcountyreOPD['HISPANIC', 'M'].fillna(0)+ NCcountyreOPD['HISPANIC', 'O'].fillna(0)+NCcountyreOPD['HISPANIC', 'P'].fillna(0)+NCcountyreOPD['HISPANIC', 'S'].fillna(0)+NCcountyreOPD['HISPANIC', 'U'].fillna(0)
    NCOpdf = NCcountyreOPD.drop([('RACE',0),('RACE', 3),        ('RACE', 4),        ('RACE', 5),
              ('RACE', 6),        ('RACE', 7),        ('RACE', 8),
              ('RACE', 9),('HISPANIC', 'C'),  ('HISPANIC', 'M'),
        ('HISPANIC', 'N'),  ('HISPANIC', 'O'),  ('HISPANIC', 'P'),
        ('HISPANIC', 'S'),  ('HISPANIC', 'U')],axis=1)
    NCOpdf= NCOpdf.rename(columns={('RACE',1):'WOPD', ('RACE',2):'AAOPD'})
    NCOpdf["TotalRate"]= (NCOpdf['OpDeaths']/NCOpdf['Total Popu'])*10000
    NCOpdf["WRate"]= (NCOpdf['WOPD']/NCOpdf['White'])*10000
    NCOpdf["AARate"]= (NCOpdf['AAOPD']/NCOpdf['Black'])*10000
    NCOpdf["HRate"]= (NCOpdf['HispanicOPD']/NCOpdf['Hispanic'])*10000
    return NCOpdf