# Figure 3

- Notebook to make data for figure 3 (likely a map)
- by Cascade Tuholske on 2020.10.01

**NEED TO DROP THOSE WITH LESS THAN ZERO**

#### Dependencies

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import statsmodels.api as sm
import seaborn as sns
import glob

#### Load and make data

In [36]:
def make_pdays(df_stats, df_pop):
    
    "Makes a dataframe with stats and population to calc people days"
    
    # Make Population Long Format
    pop_long = pd.wide_to_long(df_pop, stubnames = 'P', i = 'ID_HDC_G0', j = 'year')
    pop_long.reset_index(level=0, inplace=True)
    pop_long.reset_index(level=0, inplace=True)
    pop_long = pop_long.drop('Unnamed: 0', axis = 1)
    
    # Get Total Days 
    data = df_stats.drop_duplicates(['year', 'ID_HDC_G0'], keep = 'first') # drop out duplicates
    pdays = pd.DataFrame()
    pdays['ID_HDC_G0'] = data['ID_HDC_G0']
    pdays['year'] = data['year']
    pdays['total_days'] = data['total_days']
    pdays['CTR_MN_NM'] = data['CTR_MN_NM']
    pdays['sub-region'] = data['sub-region']
    pdays['region'] = data['region']
    pdays['intermediate-region'] = data['intermediate-region']
    
    # Merge
    pdays_merge = pdays.merge(pop_long, on=['ID_HDC_G0', 'year'], how = 'left')

    # Now get people days from 1983 and change
    p = pd.DataFrame()
    p['ID_HDC_G0'] = df_pop['ID_HDC_G0']
    p['P1983'] = df_pop['P1983']
    p['P2016'] = df_pop['P2016']
#    p['p_delt'] = (df_pop['P2016'] - df_pop['P1983']) / df_pop['P1983'] * 100

    pdays_merge = pdays_merge.merge(p ,on=['ID_HDC_G0'], how = 'left')
    
    # Calc p days
    pdays_merge['people_days'] = pdays_merge['total_days'] * pdays_merge['P'] / 10**6 # total people days
    pdays_merge['people_days_83'] = pdays_merge['total_days'] * pdays_merge['P1983'] / 10**6 # people days w/ pop con
    pdays_merge['people_days_attr'] = pdays_merge['people_days'] - pdays_merge['people_days_83']  # dif
    
    return pdays_merge

In [3]:
# file paths
DATA_IN = "/home/cascade/projects/data_out_urbanheat/"  # Note: Need ?dl=1 to make sure this file gets read correctly
FN_stats = 'heatrange/All_data20200109_406C_es_final.csv'
df_stats = pd.read_csv(DATA_IN+FN_stats)
# FIG_OUT = '/home/cascade/projects/figures/'

FN_pop = 'GHS-UCDB-Interp.csv'
df_pop = pd.read_csv(DATA_IN+FN_pop)


In [26]:
df_data = make_pdays(df_stats, df_pop)

#### City-level reg coef

In [37]:
def pop_stat(df, geog, stats):
    
    """Finds linear coef for increase in stat by a given geography from 1983 - 2016, as well
    as the pct change in population of the cities within the given geography"""

    # Get results
    labels = []
    delt_list = []
    r2_list = []
    coef_list = []
    p_list = []
    df_out = pd.DataFrame()

    for label, df_geog in df.groupby(geog):

        # Get Data
        X_year = np.array(df_geog.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
        Y_stats = np.array(df_geog.groupby('year')[stats].sum()).reshape((-1, 1))

        # Add Intercept
        X_year_2 = sm.add_constant(X_year)

        # Regress
        model = sm.OLS(Y_stats, X_year_2).fit() 
        
        # Get slope
        # first param in intercept coef, second is slope of line but if slope = 0, then intecept
        if len(model.params) == 2:
            coef = model.params[1]
            
        else:
            coef = model.params[0]
        
        # R2 and P
        r2 = model.rsquared_adj
        p = model.pvalues[0]

        # Pop change
        delt = (df_geog['P2016'].sum() - df_geog['P1983'].sum())/df_geog['P1983'].sum() * 100

        # Make lists
        labels.append(label)
        r2_list.append(r2)
        coef_list.append(coef)
        p_list.append(p)
        delt_list.append(delt)

    # Make data frame
    df_out[geog] = labels
    df_out['p_delt'] = delt_list
    df_out['r2'] = r2_list
    df_out['coef'] = coef_list
    df_out['p_value'] = [round(elem, 4) for elem in p_list]

    return df_out

In [44]:
city_reg = pop_stat(df_data, 'ID_HDC_G0', 'people_days')



In [45]:
city_reg = city_reg[city_reg['p_value'] < 0.05]
city_reg = city_reg[np.isfinite(city_reg['r2'])]

In [46]:
city_reg

Unnamed: 0,ID_HDC_G0,p_delt,r2,coef,p_value
5,28,54.160669,0.293571,0.010786,0.0034
6,29,58.481011,0.209235,0.059782,0.0073
10,33,53.757662,0.286371,0.008153,0.0022
11,34,54.585504,0.389699,0.075298,0.0002
12,35,32.194459,0.116777,0.024425,0.0461
...,...,...,...,...,...
7010,12977,53.493044,0.997502,0.009428,0.0234
7019,13025,306.773983,0.386448,0.008347,0.0003
7020,13027,403.408088,0.399170,0.019045,0.0002
7021,13041,52.963691,0.468073,0.007257,0.0379


In [47]:
df_out = pd.DataFrame()
df_out['GCPNT_LAT'] = df_stats['GCPNT_LAT']
df_out['GCPNT_LON'] = df_stats['GCPNT_LON']
df_out['ID_HDC_G0'] = df_stats['ID_HDC_G0']
df_out['CTR_MN_NM'] = df_stats['CTR_MN_NM']
df_out['sub-region'] = df_stats['sub-region']
df_out = df_out.drop_duplicates('ID_HDC_G0')

In [48]:
city_reg = city_reg.merge(df_out, on = 'ID_HDC_G0', how = 'inner')

In [49]:
city_reg.head()

Unnamed: 0,ID_HDC_G0,p_delt,r2,coef,p_value,GCPNT_LAT,GCPNT_LON,CTR_MN_NM,sub-region
0,28,54.160669,0.293571,0.010786,0.0034,37.730079,-121.431413,United States,Northern America
1,29,58.481011,0.209235,0.059782,0.0073,35.36381,-119.047535,United States,Northern America
2,33,53.757662,0.286371,0.008153,0.0022,37.799915,-121.21972,United States,Northern America
3,34,54.585504,0.389699,0.075298,0.0002,37.973783,-121.295244,United States,Northern America
4,35,32.194459,0.116777,0.024425,0.0461,37.650573,-120.987132,United States,Northern America


In [50]:
## Turn people days from millions into thousands

city_reg['coef'] = city_reg['coef'] *10**3

In [51]:
city_reg.head()

Unnamed: 0,ID_HDC_G0,p_delt,r2,coef,p_value,GCPNT_LAT,GCPNT_LON,CTR_MN_NM,sub-region
0,28,54.160669,0.293571,10.786243,0.0034,37.730079,-121.431413,United States,Northern America
1,29,58.481011,0.209235,59.782169,0.0073,35.36381,-119.047535,United States,Northern America
2,33,53.757662,0.286371,8.152992,0.0022,37.799915,-121.21972,United States,Northern America
3,34,54.585504,0.389699,75.298299,0.0002,37.973783,-121.295244,United States,Northern America
4,35,32.194459,0.116777,24.424531,0.0461,37.650573,-120.987132,United States,Northern America


In [52]:
city_reg.to_csv(DATA_IN+'City-Level-PeoplelDays_20200112.csv')