# Figure 3

- Notebook to make data for figure 3 (likely a map)
- by Cascade Tuholske on 2020.10.01

**NEED TO DROP THOSE WITH LESS THAN ZERO**

#### Dependencies

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import statsmodels.api as sm
import seaborn as sns
import glob

#### Functions

In [3]:
def pop_stat(df, geog, stats):
    
    """Finds linear coef for increase in stat by a given geography from 1983 - 2016, as well
    as the pct change in population of the cities within the given geography"""

    # Get results
    labels = []
    delt_list = []
    r2_list = []
    coef_list = []
    p_list = []
    df_out = pd.DataFrame()

    for label, df_geog in df.groupby(geog):

        # Get Data
        X_year = np.array(df_geog.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
        Y_stats = np.array(df_geog.groupby('year')[stats].sum()).reshape((-1, 1))

        # Add Intercept
        X_year_2 = sm.add_constant(X_year)

        # Regress
        model = sm.OLS(Y_stats, X_year_2).fit() 
        
        # Get slope
        # first param in intercept coef, second is slope of line but if slope = 0, then intecept
        if len(model.params) == 2:
            coef = model.params[1]
            
        else:
            coef = model.params[0]
        
        # R2 and P
        r2 = model.rsquared_adj
        p = model.pvalues[0]

        # Pop change
        delt = df_geog.drop_duplicates('ID_HDC_G0').copy()
        delt['delt_pop'] = delt['P2016'] - delt['P1983']
        delt = delt['delt_pop'].sum()

        # Make lists
        labels.append(label)
        r2_list.append(r2)
        coef_list.append(coef)
        p_list.append(p)
        delt_list.append(delt)

    # Make data frame
    df_out[geog] = labels
    df_out['p_delt'] = delt_list
    df_out['r2'] = r2_list
    df_out['coef'] = coef_list
    df_out['p_value'] = [round(elem, 4) for elem in p_list]

    return df_out

#### Load and make data

In [4]:
# file path
DATA_IN = "/home/cascade/projects/UrbanHeat/data/"  # Note: Need ?dl=1 to make sure this file gets read correctly
FIG_OUT = "/home/cascade/projects/UrbanHeat/figures/"

# Raw Heat
FN_raw = 'processed/All_data_Raw406_es_final_pdayadd.csv'
raw_pdays = pd.read_csv(DATA_IN+FN_raw)


In [6]:
raw_stats.head()

Unnamed: 0.1,Unnamed: 0,ID_HDC_G0,year,total_days,CTR_MN_NM,sub-region,region,intermediate-region,P,P1983,P2016,people_days,people_days_heat,people_days_pop
0,0,2784,1983,1,Germany,Western Europe,Europe,Western Europe,997636.1,997636.118717,1573652.0,0.000998,0.000998,0.0
1,1,2784,2013,1,Germany,Western Europe,Europe,Western Europe,1538112.0,997636.118717,1573652.0,0.001538,0.000998,0.0005404756
2,2,2833,1983,1,Germany,Western Europe,Europe,Western Europe,60140.93,60140.932532,64978.56,6e-05,6e-05,0.0
3,3,2833,1984,1,Germany,Western Europe,Europe,Western Europe,60331.27,60140.932532,64978.56,6e-05,6e-05,1.903325e-07
4,4,2833,1998,1,Germany,Western Europe,Europe,Western Europe,64114.84,60140.932532,64978.56,6.4e-05,6e-05,3.973904e-06


# City-level change

In [7]:
# Find Regions where the trend in people days due to heat is sig at 0.05
heat_stat = 'total_days'
pdays_stat = 'people_days'
geog = 'sub-region'
df_region = pop_stat(raw_stats, geog, heat_stat)
df_region = df_region[df_region['p_value'] < 0.05]
sig_regions = list(df_region[geog])
len(sig_regions)

8

In [25]:
# Make an empty df
df_out = pd.DataFrame(columns = ['ID_HDC_G0', 'p_delt', 'r2', 'coef_heat', 'p_value', 'coef_pday'])

In [27]:
# Get city-level data
for i, region in enumerate(sig_regions):
    
    df_region = raw_stats[raw_stats[geog] == region] # select region
    
    # Get people days dues to heat coef
    heat = pop_stat(df_region, 'ID_HDC_G0', heat_stat) # get stats 
    heat = heat[heat['p_value'] < 0.05] # subset by p value
    heat.rename(columns={"coef": "coef_heat"}, inplace = True)
    
    # Get people days total
    pdays_all = pop_stat(df_region, 'ID_HDC_G0', 'people_days') # get stats
    pdays_all.rename(columns={"coef": "coef_pday"}, inplace = True)
    
    # try to merge them
    heat = heat.merge(pdays_all[['ID_HDC_G0', 'coef_pday']], on = 'ID_HDC_G0', how = 'left')
    
    # add them on
    df_out = df_out.append(heat)
    print(len(df_out))

2
39
64
422
440
1366
1870
2094


#### Merge Back in Meta Data


In [45]:
FN_raw = 'processed/All_data_Raw406_es_final.csv'
raw_stats = pd.read_csv(DATA_IN+FN_raw)

In [46]:
meta = raw_stats[['ID_HDC_G0', 'CTR_MN_NM','region', 'sub-region', 'intermediate-region', 'GCPNT_LAT', 'GCPNT_LON']]

In [47]:
meta = meta.drop_duplicates('ID_HDC_G0')

In [48]:
df_final = df_out.merge(meta, on = 'ID_HDC_G0', how = 'left')

In [49]:
len(df_final)

2094

In [50]:
df_final.head()

Unnamed: 0,ID_HDC_G0,p_delt,r2,coef_heat,p_value,coef_pday,CTR_MN_NM,region,sub-region,intermediate-region,GCPNT_LAT,GCPNT_LON
0,13041,37268.205086,0.17463,0.025974,0.0084,3e-06,Australia,Oceania,Australia and New Zealand,Australia and New Zealand,-34.058769,150.820711
1,13042,36929.203311,0.166724,0.063866,0.0101,7e-06,Australia,Oceania,Australia and New Zealand,Australia and New Zealand,-33.755903,150.697096
2,6628,47413.886787,0.120281,0.020168,0.0262,1e-06,China,Asia,Eastern Asia,Eastern Asia,44.42271,84.683331
3,6764,220955.714565,0.171578,0.067532,0.009,2.4e-05,China,Asia,Eastern Asia,Eastern Asia,44.322059,86.041839
4,7010,30858.179137,0.320438,0.106952,0.0003,6e-06,China,Asia,Eastern Asia,Eastern Asia,44.159307,87.533838


In [51]:
fn_out = 'processed/All_data_Raw406_es_final_pdayadd_fig3.csv'
df_final.to_csv(DATA_IN+fn_out)