# Figure 2

- Notebook to make data for figure 2 (likely a map)
- by Cascade Tuholske on 2020.10.01

#### Depdencies

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import statsmodels.api as sm
import seaborn as sns
import glob

#### Functions

In [2]:
def make_pdays(df_stats, df_pop):
    
    "Makes a dataframe with stats and population to calc people days"
    
    # Make Population Long Format
    pop_long = pd.wide_to_long(df_pop, stubnames = 'P', i = 'ID_HDC_G0', j = 'year')
    pop_long.reset_index(level=0, inplace=True)
    pop_long.reset_index(level=0, inplace=True)
    pop_long = pop_long.drop('Unnamed: 0', axis = 1)
    
    # Get Total Days 
    data = df_stats.drop_duplicates(['year', 'ID_HDC_G0'], keep = 'first') # drop out duplicates
    pdays = pd.DataFrame()
    pdays['ID_HDC_G0'] = data['ID_HDC_G0']
    pdays['year'] = data['year']
    pdays['total_days'] = data['total_days']
    pdays['CTR_MN_NM'] = data['CTR_MN_NM']
    pdays['sub-region'] = data['sub-region']
    pdays['region'] = data['region']
    pdays['intermediate-region'] = data['intermediate-region']
    
    # Merge
    pdays_merge = pdays.merge(pop_long, on=['ID_HDC_G0', 'year'], how = 'left')

    # Now get people days from 1983
    p83 = pd.DataFrame()
    p83['ID_HDC_G0'] = df_pop['ID_HDC_G0']
    p83['P1983'] = df_pop['P1983']

    pdays_merge = pdays_merge.merge(p83, on=['ID_HDC_G0'], how = 'left')
    
    # Calc p days
    pdays_merge['people_days'] = pdays_merge['total_days'] * pdays_merge['P'] / 10**9 # total people days
    pdays_merge['people_days_83'] = pdays_merge['total_days'] * pdays_merge['P1983'] / 10**9 # people days w/ pop con
    pdays_merge['people_days_attr'] = pdays_merge['people_days'] - pdays_merge['people_days_83']  # dif
    pdays_merge['test'] = pdays_merge['total_days'] * (pdays_merge['P'] - pdays_merge['P1983'])
    
    return pdays_merge

In [3]:
# Regressions

# Get Data

def lm_func(df, col):
    
    "simple linear model of a time series data, returns coef"
    
    X_year = np.array(df.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
    Y_stats = np.array(df.groupby('year')[col].sum()).reshape((-1, 1))

    lm = LinearRegression()
    model = lm.fit(X_year, Y_stats)
    coef = lm.coef_
    
    return coef

# Load and Make Data

In [6]:
# file paths
DATA_IN = "/home/cascade/projects/data_out_urbanheat/"  # Note: Need ?dl=1 to make sure this file gets read correctly
FN_stats = 'heatrange/All_data20200109_406C_es_final.csv'
df_stats = pd.read_csv(DATA_IN+FN_stats)
# FIG_OUT = '/home/cascade/projects/figures/'

FN_pop = 'GHS-UCDB-Interp.csv'
df_pop = pd.read_csv(DATA_IN+FN_pop)

In [7]:
df_data = make_pdays(df_stats, df_pop)

# Create Reg Stats

In [8]:
from numpy.polynomial.polynomial import polyfit

# make things easier for OLS
regressor = LinearRegression()  

# Geog 
geog = 'CTR_MN_NM'

# Set stat
stats = 'severity'

# Get results

labels = []
r2_list = []
coef_list = []
p_list = []
df_out = pd.DataFrame()

for label, df_c in df.groupby(geog):
    
    # Get Data
    X_year = np.array(df_c.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
    Y_stats = np.array(df_c.groupby('year')[stats].mean()).reshape((-1, 1))
    
    # Regress
    model = sm.OLS(Y_stats, X_year).fit()
    coef = model.params[0]
    r2 = model.rsquared_adj
    p = model.pvalues[0]
    
    # Make lists
    labels.append(label)
    r2_list.append(r2)
    coef_list.append(coef)
    p_list.append(p)
#     print(label)
#     print(model.summary())
    
# Make data frame
df_out[geog] = labels
df_out['r2'] = r2_list
df_out['coef'] = coef_list
df_out['p_value'] = [round(elem, 4) for elem in p_list]

Unnamed: 0,ID_HDC_G0,year,total_days,CTR_MN_NM,sub-region,region,intermediate-region,P,P1983,people_days,people_days_83,people_days_attr,test
0,2784,1983,1,Germany,Western Europe,Europe,Western Europe,997636.1,997636.118717,0.000998,0.000998,0.0,0.0
1,2784,2013,1,Germany,Western Europe,Europe,Western Europe,1538112.0,997636.118717,0.001538,0.000998,0.0005404756,540475.589177
2,2833,1983,1,Germany,Western Europe,Europe,Western Europe,60140.93,60140.932532,6e-05,6e-05,0.0,0.0
3,2833,1984,1,Germany,Western Europe,Europe,Western Europe,60331.27,60140.932532,6e-05,6e-05,1.903325e-07,190.332532
4,2833,1998,1,Germany,Western Europe,Europe,Western Europe,64114.84,60140.932532,6.4e-05,6e-05,3.973904e-06,3973.903796
