# Building panel data for GDP and temperature
#### Christopher Callahan
#### Christopher.W.Callahan.GR@dartmouth.edu

#### Mechanics
Dependencies

In [1]:
import xarray as xr
import numpy as np
import sys
import os
import datetime
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, cm
from matplotlib import rcParams
import matplotlib.gridspec as gridspec
import seaborn as sns

Data locations

In [2]:
loc_gdp = "../Data/GDP/"
loc_gdp_ppp = "../Data/GDP/"
loc_tmean = "../Data/CountryObs/"
loc_precip = "../Data/CountryObs/"
loc_regions = "../Data/Regions/"
loc_out = "../Data/Panel/"

Years

In [3]:
y1 = 1960
y2 = 2014
y1_temp = 1900
y2_temp = 2015
y1_precip = 1900
y2_precip = 2015

#### Analysis

GDP and GDP per capita

In [4]:
gdp_panel = pd.read_csv(loc_gdp+"GDP_Country_Panel.csv")
gpc_panel = pd.read_csv(loc_gdp+"GDP_PerCap_Country_Panel.csv")

In [5]:
gdp_ppp = pd.read_csv(loc_gdp_ppp+"worldbank_gdp_pc_ppp_constant2017.csv")

Country-average temp and precip

In [6]:
import warnings
warnings.filterwarnings("ignore",category=FutureWarning,message="'base' in .resample")

In [7]:
def monthly_to_yearly_mean(x):

    # calculate annual mean from monthly data
    # after weighting for the difference in month length
    # x must be data-array with time coord
    # xarray must be installed

    # x_yr = x.resample(time="YS").mean(dim="time") is wrong
    # because it doesn't weight for the # of days in each month

    days_in_mon = x.time.dt.days_in_month
    wgts = days_in_mon.groupby("time.year")/days_in_mon.groupby("time.year").sum()
    ones = xr.where(x.isnull(),0.0,1.0)
    x_sum = (x*wgts).resample(time="YS").sum(dim="time")
    ones_out = (ones*wgts).resample(time="YS").sum(dim="time")
    return(x_sum/ones_out)

In [8]:
temp_custom_ds = xr.open_dataset(loc_tmean+"obsensemble_country_temp_monthly_"+str(y1_temp)+"-"+str(y2_temp)+".nc")
precip_custom = xr.open_dataarray(loc_precip+"ObsEnsemble_country_precip_monthly_"+str(y1_precip)+"-"+str(y2_precip)+".nc")

In [10]:
temp_custom = temp_custom_ds.temp_ensemble
iso_custom = temp_custom.coords["iso"].values

temp_custom_yr = monthly_to_yearly_mean(temp_custom) #temp_custom.resample(time="YS").mean(dim="time")
temp_custom_yr.coords["time"] = np.arange(y1_temp,y2_temp+1,1)
temp_udel_yr = monthly_to_yearly_mean(temp_custom_ds.temp_udel)
temp_udel_yr.coords["time"] = np.arange(y1_temp,y2_temp+1,1)
temp_20cr_yr = monthly_to_yearly_mean(temp_custom_ds.temp_20cr)
temp_20cr_yr.coords["time"] = np.arange(y1_temp,y2_temp+1,1)
temp_best_yr = monthly_to_yearly_mean(temp_custom_ds.temp_best)
temp_best_yr.coords["time"] = np.arange(y1_temp,y2_temp+1,1)

precip_custom_yr = precip_custom.resample(time="YS").mean(dim="time")
precip_custom_yr.coords["time"] = np.arange(y1_precip,y2_precip+1,1)

  return np.nanmean(a, axis=axis, dtype=dtype)


Create new dataframe

In [11]:
nc = len(iso_custom)
years = np.arange(y1,y2+1,1)
years_repeat = np.tile(years,nc).flatten()
iso_repeat = np.repeat(iso_custom,len(years))

In [12]:
gdp_temp_panel = pd.DataFrame(np.transpose([years_repeat,iso_repeat]),columns=["Year","ISO"])

Combine data

In [13]:
gdp_df = gdp_panel.drop(columns=["CountryName"]).rename(columns={"CountryCode":"ISO"})
gdp_temp_panel = pd.merge(gdp_temp_panel,gdp_df,on=["Year","ISO"],how="left")
gpc_df = gpc_panel.drop(columns=["CountryName"]).rename(columns={"CountryCode":"ISO"})
gdp_temp_panel = pd.merge(gdp_temp_panel,gpc_df,on=["Year","ISO"],how="left")

In [14]:
temp_custom_yr.name = "Temp"
temp_df = temp_custom_yr.rename({"iso":"ISO","time":"Year"}).to_dataframe().reset_index()
temp_udel_yr.name = "Temp_UDel"
temp_udel_df = temp_udel_yr.rename({"iso":"ISO","time":"Year"}).to_dataframe().reset_index()
temp_20cr_yr.name = "Temp_20cr"
temp_20cr_df = temp_20cr_yr.rename({"iso":"ISO","time":"Year"}).to_dataframe().reset_index()
temp_best_yr.name = "Temp_BEST"
temp_best_df = temp_best_yr.rename({"iso":"ISO","time":"Year"}).to_dataframe().reset_index()

gdp_temp_panel = pd.merge(gdp_temp_panel,temp_df,on=["Year","ISO"],how="left")
gdp_temp_panel = pd.merge(gdp_temp_panel,temp_udel_df,on=["Year","ISO"],how="left")
gdp_temp_panel = pd.merge(gdp_temp_panel,temp_20cr_df,on=["Year","ISO"],how="left")
gdp_temp_panel = pd.merge(gdp_temp_panel,temp_best_df,on=["Year","ISO"],how="left")

precip_custom_yr.name = "Precip"
precip_df = precip_custom_yr.rename({"iso":"ISO","time":"Year"}).to_dataframe().reset_index()
gdp_temp_panel = pd.merge(gdp_temp_panel,precip_df,on=["Year","ISO"],how="left")

Year-1990 PPP GDP

In [15]:
gdp_ppp_yr = gdp_ppp.loc[:,["Country Code","1990"]].rename(columns={"Country Code":"ISO","1990":"GPC_PPP_1990"})
gdp_temp_panel = pd.merge(gdp_temp_panel,gdp_ppp_yr,on=["ISO"],how="left")

Add country/year info and trends

In [16]:
countries = gdp_temp_panel.loc[:,"ISO"].values
countries_sorted = list(sorted(set(countries)))
years = gdp_temp_panel.loc[:,"Year"].values

zrs_ctry = np.zeros(len(years))
for i in np.arange(0,len(countries_sorted),1):
    zrs_lin = np.zeros(len(years))
    zrs_quad = np.zeros(len(years))
    indices = countries == countries_sorted[i]
    y_lin = years[indices] - y1
    y_quad = y_lin**2
    zrs_lin[indices] = y_lin
    zrs_quad[indices] = y_quad
    
    indices_num = indices.astype(int)
    zrs_ctry[indices] = [i+1] * len(indices_num[indices_num == 1])
    
    gdp_temp_panel.loc[:,"yi_linear_"+str(i)] = zrs_lin
    gdp_temp_panel.loc[:,"yi_quadratic_"+str(i)] = zrs_quad
    
gdp_temp_panel.loc[:,"countrynum"] = zrs_ctry

First difference of the natural log for growth

In [17]:
gdp_temp_panel.loc[:,"lnGPC"] = np.log(gdp_temp_panel.loc[:,"GPC"])

In [18]:
growth = np.zeros(len(years))
for i in np.arange(0,len(countries_sorted),1):
    indices = countries == countries_sorted[i]
    gpc_ctry = gdp_temp_panel.loc[indices,"lnGPC"].values
    diff = np.diff(gpc_ctry)
    diffnan = np.insert(diff,0,np.nan)
    indices_num = indices.astype(int)
    growth[indices] = diffnan
    
gdp_temp_panel.loc[:,"growth"] = growth

Regions (i.e., continents)

In [19]:
regions = pd.read_csv(loc_regions+"WPP2019_Regions_Processed.csv")

In [20]:
gdp_temp_panel["region"] = np.full(len(years),np.nan)
for i in np.arange(0,len(iso_repeat),1):
    code = iso_repeat[i]
    if code in regions.ISO3.values:
        reg = regions.loc[regions.ISO3.values==code,"RegionCode"].values[0]
        gdp_temp_panel.loc[gdp_temp_panel.ISO.values==code,"region"] = reg

Write out

In [21]:
gdp_temp_panel.to_csv(loc_out+"Attribution_DamageFunction_Panel.csv")