# Figure 3 Final

Notebook to make data for figure 3 for ms <br>
Actual figure will be rendered in QGIS  <br>
by Cascade Tuholske 2020.02.23

In [1]:
#### Depdencies 
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns

In [2]:
#### Functions
def pop_stat(df, geog, stats):
    
    """Finds linear coef for increase in stat by a given geography from 1983 - 2016, as well
    as the pct change in population of the cities within the given geography"""

    # Get results
    labels = []
    delt_list = []
    r2_list = []
    coef_list = []
    p_list = []
    p83 = []
    p16 = []
    df_out = pd.DataFrame()

    for label, df_geog in df.groupby(geog):

        # Get Data
        X_year = np.array(df_geog.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
        Y_stats = np.array(df_geog.groupby('year')[stats].sum()).reshape((-1, 1))

        # Add Intercept
        X_year_2 = sm.add_constant(X_year)

        # Regress
        model = sm.OLS(Y_stats, X_year_2).fit() 
        
        # Get slope
        # first param in intercept coef, second is slope of line but if slope = 0, then intecept
        if len(model.params) == 2:
            coef = model.params[1]
            
        else:
            coef = model.params[0]
        
        # R2 and P
        r2 = model.rsquared_adj
        p = model.pvalues[0]

        # Pop change
        delt = df_geog.drop_duplicates('ID_HDC_G0').copy()
        delt['delt_pop'] = delt['P2016'] - delt['P1983']
        delt = delt['delt_pop'].sum()
        
        # Make lists
        labels.append(label)
        r2_list.append(r2)
        coef_list.append(coef)
        p_list.append(p)
        delt_list.append(delt)

    # Make data frame
    df_out[geog] = labels
    df_out['p_delt'] = delt_list
    df_out['r2'] = r2_list
    df_out['coef'] = coef_list
    df_out['p_value'] = [round(elem, 4) for elem in p_list]

    return df_out

def plot_data(stats, geog):
    """ Function calculate coef of people days due to pop and heat and the 
    attribution index for distribution plots
    
    Args:
        stats = df to feed in
        geog = geography level to conduct analysis (city-level is 'ID-HDC-G0')
    
    """
    # Get people days dues to heat coef
    heat = pop_stat(stats, geog, 'people_days_heat') # get stats 
    heat.rename(columns={"coef": "coef_heat"}, inplace = True)
    
    # Get people days total
    pdays_all = pop_stat(stats, geog, 'people_days_pop') # get stats
    pdays_all.rename(columns={"coef": "coef_pop"}, inplace = True)
    
    # Merge pdays_all and heat
    heat = heat.merge(pdays_all[[geog, 'coef_pop']], on = geog, how = 'left')
    
    # drop negitive and zero slopes
    heat = heat[heat['coef_heat'] > 0]
    heat = heat[heat['coef_pop'] > 0]
    
    # attrib coef
    heat['coef_attrib'] = (heat['coef_pop'] - heat['coef_heat']) / (heat['coef_pop'] + heat['coef_heat']) # normalize dif
    
    return heat

In [12]:
#### Load Data
# file path
DATA_IN = "/home/cascade/projects/UrbanHeat/data/"  # Note: Need ?dl=1 to make sure this file gets read correctly
FIG_OUT = "/home/cascade/projects/UrbanHeat/figures/"

# Raw Heat
FN_IN = 'processed/All_data_HI406_figdata.csv'
FN_OUT = 'processed/All_data_HI406_figdata_map.csv'
stats = pd.read_csv(DATA_IN+FN_IN)

# scale the date in the plot 
scale = 10**9 

In [4]:
stats.head()

Unnamed: 0.1,Unnamed: 0,ID_HDC_G0,year,total_days,P,P1983,P2016,people_days,people_days_heat,people_days_pop
0,0,22,1983,2,52064.452435,52064.452435,73006.671133,0.000104,0.000104,0.0
1,1,26,1983,1,194088.886834,194088.886834,268055.635628,0.000194,0.000194,0.0
2,2,27,1983,1,80540.77994,80540.77994,93335.494324,8.1e-05,8.1e-05,0.0
3,3,28,1983,9,59320.971209,59320.971209,91449.606255,0.000534,0.000534,0.0
4,4,29,1983,8,336518.836621,336518.836621,533318.453653,0.002692,0.002692,0.0


In [5]:
#### Run the stats for each city
geog = 'ID_HDC_G0'
stats_out = plot_data(stats, geog)

## Drop cities where p_value for people days is >0.05
stats_out_final = stats_out[stats_out['p_value'] < 0.05]
stats_out_final.head()

  return 1 - self.ssr/self.centered_tss
  return self.params / self.bse
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


Unnamed: 0,ID_HDC_G0,p_delt,r2,coef_heat,p_value,coef_pop,coef_attrib
5,22,20942.218698,0.24209,1.1e-05,0.0021,6e-06,-0.336245
8,26,73966.748794,0.289913,3.9e-05,0.0007,1.6e-05,-0.411746
10,28,32128.635046,0.329861,2.4e-05,0.0003,2e-05,-0.091068
12,30,10807.744417,0.129929,9e-06,0.0222,2e-06,-0.642931
15,33,21560.935608,0.178242,1.3e-05,0.0087,1.2e-05,-0.048928


In [6]:
## Add In Meta Data
geog = ['region', 'intermediate-region', 'sub-region','CTR_MN_NM', 'ID_HDC_G0', 'GCPNT_LAT', 'GCPNT_LON']
meta_fn = 'processed/All_data_HI406_meta.csv'
all_data = pd.read_csv(DATA_IN+meta_fn)
meta = all_data[geog]
meta = meta.drop_duplicates('ID_HDC_G0')

## Merge in meta
stats_out_final = stats_out_final.merge(meta, on = 'ID_HDC_G0', how = 'left')

## Add In Population
pop = stats[['P1983', 'P2016', 'ID_HDC_G0']]
pop = pop.drop_duplicates('ID_HDC_G0')
stats_out_final = stats_out_final.merge(pop, on = 'ID_HDC_G0', how = 'inner')

## Write it out 
stats_out_final_meta_pop.to_csv(DATA_IN+FN_OUT)

Unnamed: 0,ID_HDC_G0,p_delt,r2,coef_heat,p_value,coef_pop,coef_attrib,region,intermediate-region,sub-region,CTR_MN_NM,GCPNT_LAT,GCPNT_LON
0,22,20942.218698,0.24209,1.1e-05,0.0021,6e-06,-0.336245,Americas,Northern America,Northern America,United States,37.688409,-121.75398
1,26,73966.748794,0.289913,3.9e-05,0.0007,1.6e-05,-0.411746,Americas,Northern America,Northern America,United States,37.985433,-121.797516
2,28,32128.635046,0.329861,2.4e-05,0.0003,2e-05,-0.091068,Americas,Northern America,Northern America,United States,37.730079,-121.431413
3,30,10807.744417,0.129929,9e-06,0.0222,2e-06,-0.642931,Americas,Northern America,Northern America,United States,38.346859,-121.969447
4,33,21560.935608,0.178242,1.3e-05,0.0087,1.2e-05,-0.048928,Americas,Northern America,Northern America,United States,37.799915,-121.21972


In [13]:
stats_out_final_meta_pop.to_csv(DATA_IN+FN_OUT)