In [17]:
%reset-f
%load_ext autoreload
%autoreload
%matplotlib inline

from pandas.io import wb
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm

import warnings
warnings.filterwarnings("always",category=UserWarning)
from res_ind_lib import *
import os, time
import itertools

from progress_reporter import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
#Folder where outputs are stored
out_folder = "scorecards/"
os.makedirs(out_folder,exist_ok=True)

# data

In [19]:
df_original=pd.read_csv("df_original.csv",index_col="country")

#appends "countries" from the sensitivity analysis
df_original=df_original.append(pd.read_csv("df_sensit_input.csv",index_col="country"))

df_with_results=pd.read_csv("df.csv",index_col="country")

#Rankings 
ranks = df_with_results.dropna(how="all",axis=1).dropna().rank(method="min",ascending =False) 
ranks.to_csv("rankings.csv")

#appends sensitivity anlysis
df_with_results=df_with_results.append(pd.read_csv("df_sensit_results.csv").set_index("country"))

#computes all derivatives

In [20]:
deriv_set = np.setdiff1d( df_original.columns.values,
    ["pop","iso3","original_country","gdp_pc_pp","pov_head","avg_prod_k_ref","income_elast","faref","avg_prod_k", "peref","vref","share1_ref"
     ,"bashs","ophe","fa_ref","v_ref"]+[c for c in df_original.columns if c.startswith("fa_ratio")])
pd.DataFrame(data=deriv_set).to_csv("deriv_set.csv",index=False,header=False)

def compute_derivative(df_original,outname):
    der = pd.DataFrame()
    h=0.0001
    #loop on all data in df prior to add the results
    fx = compute_resiliences(df_original)[outname]
    for var in deriv_set:
        try:
            df_=df_original.copy(deep=True)
            df_[var]=df_[var]+h
            fxh= compute_resiliences(df_)[outname]
            der[var] = (fxh-fx)/(h)
        except TypeError:
            print("no derivative for " +var)
    return der


In [21]:
#Checks that info has information for all variables in deriv_set
info = pd.read_csv("inputs_info.csv").set_index("key")
for d in deriv_set:
    if d not in info.index.values:
        raise Exception(d+" is not documented in inputs_info.csv")


In [22]:
#todo: save a loop by taking all score_card_set from the derivative

#new dataframe with countries in rows and (resilience type,input) as column
#score_card_set = ["resilience","resilience_no_shock","resilience_no_shock_no_uspcale","resilience_no_shock_no_SP"] 
score_card_set = ["resilience","risk","v_shew", "dKpc"]
headr = list(itertools.product(score_card_set,deriv_set))
derivatives=  pd.DataFrame(index=df_original.dropna().index.values, columns=pd.MultiIndex.from_tuples(headr))

#computes all derivatives
for outname in score_card_set:
    progress_reporter(outname)
    derivatives[outname]=(compute_derivative(df_original,outname))/info["weight_der"][deriv_set] #weights derivatives by number of people affected [avoids creating new clumns with nans]


dKpc


#output

In [23]:
#computes derivative of risk wrt resilience
der_risk = derivatives["risk"].copy()
der_risk["resilience"]=(derivatives.risk/derivatives.resilience).mode(axis=1).mean(axis=1) #because of floating point operations, mode can return several close values

der_vshew = derivatives["v_shew"]
der_risk["v_shew"]=(der_risk.v/ (der_vshew.v)) 

derivatives["resilience"].to_csv("deriv.csv")
der_risk.to_csv("deriv_risk.csv")

output signs in excel

In [24]:
#saves derivatives in excel tabs with signs in colors (very usefull for understanding the model)
writer= pd.ExcelWriter("signs.xlsx", engine='xlsxwriter')
workbook=writer.book
# Add a format. Light red fill with dark red text.
red = workbook.add_format({'bg_color': '#FFC7CE',
                               'font_color': '#9C0006'})

blue = workbook.add_format({'bg_color': '#92c5de',
                               'font_color': '#000061'})
for outname in score_card_set:
#for outname in ["resilience"]:
    (derivatives[outname].dropna()).to_excel(writer,sheet_name=outname)
    writer.sheets[outname].conditional_format('B2:BB600', {'type':'cell',
                                    'criteria': '>',
                                    'value':    0,
                                    'format':   blue})
    writer.sheets[outname].conditional_format('B2:BB600', {'type':'cell',
                                    'criteria': '<',
                                    'value':    0,
                                    'format':   red})
    writer.sheets[outname].freeze_panes(1, 1)


In [25]:
try :
    writer.save()
except PermissionError:
    warnings.warn("Cannot write excel file. Check that it's not opened and try again")


In [26]:
#Signs of resilience derivative 
der =     np.sign(derivatives["resilience"]).replace(0,np.nan)
signs= pd.Series(index=der.columns)
for i in signs.index:
    if (der[i].min()==der[i].max()): #all nonnan signs are equal
        signs[i]=der[i].min()
    else:
        print("ambigous sign for "+i)
        signs[i]=np.nan



ambigous sign for H
ambigous sign for T_rebuild_K
ambigous sign for pe
ambigous sign for protection
ambigous sign for pv
ambigous sign for v


#Write Excel scorecards

In [27]:
if False:
    abs_derivative = signs*derivatives["resilience"] #abs value
    step_for_one = (0.01/abs_derivative).replace([-np.inf,np.inf],[np.nan,np.nan])


    headr = list(itertools.product(derivatives.dropna().index.values,['level', "ranking",'der','for_one']))
    scores=  pd.DataFrame(index=deriv_set, columns=pd.MultiIndex.from_tuples(headr))


    for c in derivatives.dropna().index:
        scores[(c,'level')]=df_original.ix[c]
        scores[(c,'der')]=abs_derivative.ix[c]
        scores[(c,'for_one')]=step_for_one.ix[c]
        scores[(c,'ranking')]=ranks.ix[df_original.ix[c,"original_country"]]


    for c in derivatives.dropna().index:
    #for c in ["France"]:
        with pd.ExcelWriter('scorecards/'+c.lower().replace(" ","_").replace("\\","")+'.xlsx', engine='xlsxwriter') as writer:
            percent = writer.book.add_format()
            percent.set_num_format("0.0%")

            outs = df_with_results.ix[df_with_results.index==c,score_card_set]
            outs.transpose().to_excel(writer)
            scores[c].reset_index().rename(columns={"index":"input"}).to_excel(writer,startrow =2+len(score_card_set),index=False)


#Principal component analysis

In [28]:
df=pd.read_csv("df_original.csv").set_index("country")[deriv_set].dropna()

In [29]:
from sklearn.decomposition import PCA

In [30]:
pca = PCA(n_components=1)
pca.fit(df)

PCA(copy=True, n_components=1, whiten=False)

In [31]:
pca.components_.flatten()

array([  2.77598456e-17,   1.11022302e-16,   0.00000000e+00,
         0.00000000e+00,  -2.73848559e-03,  -3.18713039e-03,
        -3.12916269e-03,  -7.34183632e-05,  -2.85972244e-03,
        -4.32485261e-17,  -4.32485261e-17,  -2.94918295e-03,
        -2.46490729e-03,  -9.99934843e-01,   2.15032669e-04,
        -6.76220843e-03,  -8.08985528e-05,  -3.17416685e-03,
        -3.88388990e-03,  -2.05054954e-03,  -5.10451858e-04,
         1.50803517e-03,   1.49139866e-03])