In [32]:
import numpy as np
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import math
import glob,os
import sys
import scipy
from importlib import  reload
from time import process_time 
#from libraries.lib_gather_data import get_hhid_FIES

%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
from shock_libraries import *
from plotting_libraries import *
from response_libraries import get_response_sp
#
from income_shock_libraries_ps import *
#
from libraries.lib_country_dir import set_directories, load_survey_data, get_places_dict
from libraries.lib_get_hh_savings import get_hh_savings
from libraries.pandas_helper import broadcast_simple

In [34]:
# formatting & aesthetics
font = {'family':'sans serif', 'size':10}
plt.rc('font', **font)
mpl.rcParams['xtick.labelsize'] = 10
mpl.rcParams['ytick.labelsize'] = 10
mpl.rcParams['legend.facecolor'] = 'white'
sns.set_style("white")

sns_pal = sns.color_palette('Set1', n_colors=8, desat=.4)
greys_pal = sns.color_palette('Greys', n_colors=9)

In [35]:

def merge_rank_entre(rank_file='./temp/lfs_a09_pqkb_ranked_V2_entrpreneurial_20200423.csv', labor_file='./csv/ph_labor_force.csv',outfile='./csv/_labor_rank_merge.csv',merge_col='a09_pqkb'):
    """
    Description:
        - combines merged inputs from labor rankings with full household/individual dataset
    
    Assumes:
        - consistent file structure based on descriptive industry names
        - directory structure consistent with DIR: <covid_phl> when using default settings
        -left merge
    inputs:
        - rank_file = ranking file of job essential/work from home scoring // type: <STR>
        - labor_file = primary database to be merged to // type: <STR>
        - merge_col = column name for merging, must match in both files// type: <STR>
    outputs:
        - outfile = filename to print output, // type: <STR>
    returns:
        - labor_rank_merge = output dataframe with merged files // type: <pandas.df>
    
    """
    rank = pd.read_csv(rank_file) # load Kayenat file
    labor = pd.read_csv(labor_file) # load full survey

    # set index column to match types
    rank[merge_col] = [str(q) for q in rank[merge_col]] # enforce type = string
    labor[merge_col] = [str(q) for q in labor[merge_col]] # enforce type = string

    labor_rank_merge = pd.merge(labor,rank, on=merge_col, how='left') # merge on a09_pqkb
    labor_rank_merge.to_csv(outfile,index=False)
    return(labor_rank_merge) 


# create function incorporating kayenat data table into shock factor:

# goal: generate function: weight_sectors


In [36]:
#---------------------------------- Added: 20200423: <PS>

def rand_weighted_shock_subsector_entre():

    """    
        Updated 20200423
            - now compute just subsector level shocks and output table for 'a09_pqkb'
        Updated 20200422 
            - incorporate 3rd dimension for sector (public/private/gov) and impact on essentiality
                - now, only enforce that government jobs are maintained, across all sectors

        Updated 20200419 
            - incorporate 3rd dimension for sector (public/private/gov) and impact on essentiality 
        Updated 20200413:
            - incorporate 2nd dimension for social distancing potential
        module added to covid_phl: <income_shock_libraries_ps.py>

        primary development: <FCT> rand_weigthed_shock_distance() <FCT>

        - function to replace: 
            rand_weighted_shock_3dim_v2() --> rand_weighted_shock_3dim()
                --> rand_weighted_shock_distance() --> rand_weighted_shock_1() 
                    --> get_income_shock(): in <shock_libraries.py>

        - description:
            * matches existing df_shock dataframe (compatibiility)
            * uses Kayenat table of job descriptions demand value for 'a09_pqkb' by sector to create weighted probability of job disruption by sector, as input to 'fa' column of df_shock             
            dataframe -- representative FIES and LFS data 

           * for values 0.0, 0.5,1 : assigns each job description a random: 0-50%, 50-99%, 100% chance of disruption, weighting each by the prevalence of that role in each sector, to generate cumulative probability of disruption. 


            * now incorporates enforcement of social distancing measures, by enforcing social distance in non-essential jobs based on K.Kabirs' 0-4 'work-from-home' scoring.



    """
    # develop 3 factor code here:

    # make each factor modular

    mr = merge_rank_entre()
        # get subset: a09_pqkb
    
    #### **** CRITICAL CHANGE HERE 20200423 note change to LFS SECTOR / E SECTOR
    mr_subset = mr[['hhid_lfs','cc101_lno','a09_pqkb','c19_pclass','demand_scale', 'w_home','E_sector']]
    
    #ubsector_shock = subsector_shock.rename(columns={'a09_pqkb': 'subsector'})
    
    mr_subset = mr_subset.rename(columns={'E_sector':'LFS_sector'})

    
    indexNames = mr_subset[mr_subset['a09_pqkb'] == 'nan' ].index

        # Delete these row indexes from dataFrame
    mr_subset.drop(indexNames , inplace=True)
    mr_subset = mr_subset.reset_index(drop=True)

     # get subset: c19_pclass

    indexNames2 = mr_subset[mr_subset['c19_pclass'] == 'nan' ].index

        # Delete these row indexes from dataFrame
    mr_subset.drop(indexNames2 , inplace=True)
    mr_subset = mr_subset.reset_index(drop=True)

        # enforce string:
    mr_subset['a09_pqkb'] = [str(q) for q in mr_subset['a09_pqkb']] # enforce type = string
    mr_subset['LFS_sector'] = [str(q) for q in mr_subset['LFS_sector']] # enforce type = string
    mr_subset['c19_pclass'] = [str(q) for q in mr_subset['c19_pclass']] # enforce type = string


    # still need logic to build the logic for each job sector
    ## may need to restructure this whole section of code

    #####
        # drop duplicates (now that overall weighting established)
    mr_subset = mr_subset.drop_duplicates(subset='a09_pqkb')
    mr_subset = mr_subset.reset_index(drop=True)



        # generate probability and combine with relative weighting
    mr_subset['partial_prob'] = np.nan
    mr_subset['dummy'] = np.nan

        # incorporate Kayenat tables into 'di' &&
        # nested logic to incorporate 0-4 scale for social distancing measures
        ## where scores of 0 & 1 result in complete job lost, due to unable to distance
    i=0
    while i < len(mr_subset):

        # incorporate 3rd column GOVT modifiers here:
        if (mr_subset['c19_pclass'][i] == "Gov't/Gov't Corporation"):

            mr_subset.partial_prob[i] = 0  # essentially reverts the random uniform logic implemented above

        # remainder of logic tree
        else:

            if mr_subset.demand_scale[i] == 0:

                    # incorporate 0-4 scale logic:

                if mr_subset.w_home[i] == 0:
                    mr_subset.partial_prob[i] = 1

                elif mr_subset.w_home[i] == 1:
                    mr_subset.partial_prob[i] = 1

                else:
                    mr_subset.partial_prob[i] = (random.randint(0,50)/100)


            elif mr_subset.demand_scale[i] == 0.5: 

                    # incorporate 0-4 scale logic:
                if mr_subset.w_home[i] == 0:
                    mr_subset.partial_prob[i] = 1

                elif mr_subset.w_home[i] == 1:
                    mr_subset.partial_prob[i] = 1

                else: 
                    mr_subset.partial_prob[i] = (random.randint(50,100)/100)

            elif mr_subset.demand_scale[i] == 1.0:
                mr_subset.partial_prob[i] = 0

            else:
                mr_subset.dummy[i] = -99 # legacy testcase check



        i = i + 1

        # remove nans in summing fields, and dummy storage
    del mr_subset['dummy']

      # save to separate var for testing    
    #rand_weighted_shock = df_shock_cum


    subsector_shock = mr_subset[['LFS_sector','a09_pqkb','partial_prob']]
    subsector_shock = subsector_shock.rename(columns={'a09_pqkb': 'subsector'})
    subsector_shock = subsector_shock.set_index('subsector')
    subsector_shock = subsector_shock.rename(columns={'partial_prob': 'fa'})
    return(subsector_shock)

    #return(rand_weighted_shock)
    #rand_weighted_shock

In [37]:
aaa = rand_weighted_shock_subsector_entre()
aaa

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

Unnamed: 0_level_0,LFS_sector,fa
subsector,Unnamed: 1_level_1,Unnamed: 2_level_1
"Accounting, bookkeeping and auditing activities; tax consultancy",Entrep. Activities NEC,0.39
Defense activities,CSRP services,1.00
Women's and girls' and babies' garment manufacturing,Manufacturing,0.00
General public administration activities,CSRP services,1.00
Restaurants and mobile food service activities,CSRP services,0.00
...,...,...
Manufacture of wooden containers,Manufacturing,0.09
"Other business support service activities, n.e.c.",Entrep. Activities NEC,0.19
"Non-ferrous rolling, drawing and extrusion mills",Manufacturing,0.00
"Manufacture of steam generators, except central heating hot water",Manufacturing,0.00


In [38]:
#---------------------------------- Added: 20200415: <PS>
def generate_shock_100_subsector():  # initialize shock sector storage dataframe
   
    '''
    UPDATED: 20200423
        family tree:
            --> generate_shock_100
            
   current hard coding for sensitivity analysis, 20200413: requires cleaning for further implementation
   - addition of modularity
   - 
   - current functionality:
       - outputs 
       to location: './temp/sect_iter_100.csv
       - containing data frame with 101 simulations of <rand_weighted_shock_distance():
    - runtime: ~10minutes
   '''

    stor = rand_weighted_shock_subsector_entre()
    stor_orig = stor # save out full datatable for final merge
    del stor['LFS_sector']

    # set number of iterations
    p = 0
    n_iter = 98

    # model and store stochastic sector response
    while p < n_iter:
        new_val = rand_weighted_shock_subsector_entre()
        del new_val['LFS_sector']
        new_val = new_val.rename(columns={'fa': ('iter'+str(p))})

        # pd.merge(labor,rank, on=merge_col, how='left')
        stor = pd.merge(stor,new_val,on='subsector', how='left')
        p = p+ 1
        print(p)
    
    # merge to original table
    stor.to_csv('./temp/entre_subsector_a09_iter_100_20200423_pre.csv')
    
    full_stor = pd.merge(stor_orig, stor, on='subsector', how='left')
    full_stor.to_csv('./temp/entre_subsector_a09_iter_100_20200423.csv')


In [6]:
#---------------------------------- Added: 20200415: <PS>
def get_shock_stats_subsector():
    
    '''
    UPDATED: 20200423
        family tree:
            --> get_shock_stats()
    
    Notes: multiple locations where hard-coded and reading in files
    
    '''
    
    # generate shock table statistics
    #df['mean'] = df.mean(axis=1)

        # load csv to dataframe:
    #dfs = pd.read_csv('./temp/sect_iter_100.csv') # original
    dfs = pd.read_csv('./temp/entre_subsector_a09_iter_100_20200423_pre.csv') # modified 20200420
    # set index to LFS_sector
    #dfs.set_index('subsector')
    dfs.set_index('subsector')

    # compute statistics:
    dfs['mean'] = dfs.mean(axis=1)
    #print(dfs['mean'])
    dfs['std_dev'] = dfs.std(axis=1)
    #print(dfs['std_dev'])

    #round to 3 dec:
    dfs['mean'] = [(round(q, 3)) for q in dfs['mean']]
    dfs['std_dev'] = [(round(q, 3)) for q in dfs['std_dev']]

    # new datafame storing just info:
    df_stat = dfs[['subsector','mean','std_dev']].set_index('subsector')
    df_stat
    # df_stat.to_csv('./temp/phi_get_shock_input.csv') # original
    df_stat.to_csv('./temp/entre_subsector_a09_stats_20200423') # modified 20200420
    return(df_stat)


In [39]:
b = generate_shock_100_subsector()
b

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98


In [8]:
c = get_shock_stats_subsector()
c

Unnamed: 0_level_0,mean,std_dev
subsector,Unnamed: 1_level_1,Unnamed: 2_level_1
"Accounting, bookkeeping and auditing activities; tax consultancy",0.245,0.155
Defense activities,1.000,0.000
Women's and girls' and babies' garment manufacturing,0.000,0.000
General public administration activities,1.000,0.000
Restaurants and mobile food service activities,0.000,0.000
...,...,...
Manufacture of wooden containers,0.238,0.137
"Other business support service activities, n.e.c.",0.259,0.141
"Non-ferrous rolling, drawing and extrusion mills",0.000,0.000
"Manufacture of steam generators, except central heating hot water",0.000,0.000


In [15]:
# generate shock table statistics
#df['mean'] = df.mean(axis=1)

# load csv to dataframe:
#dfs = pd.read_csv('./temp/sect_iter_100.csv') # original
dfs = pd.read_csv('./temp/subsector_a09_iter_100_20200423_pre.csv') # modified 20200420
# set index to LFS_sector
#dfs.set_index('subsector')
dfs.set_index('subsector')


Unnamed: 0_level_0,fa,iter0,iter1,iter2,iter3,iter4,iter5,iter6,iter7,iter8,...,iter88,iter89,iter90,iter91,iter92,iter93,iter94,iter95,iter96,iter97
subsector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Accounting, bookkeeping and auditing activities; tax consultancy",0.34,0.44,0.33,0.46,0.13,0.05,0.07,0.15,0.36,0.47,...,0.08,0.48,0.49,0.45,0.40,0.04,0.44,0.41,0.14,0.18
Defense activities,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
Women's and girls' and babies' garment manufacturing,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
General public administration activities,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,...,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
Restaurants and mobile food service activities,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Manufacture of wooden containers,0.31,0.08,0.37,0.15,0.33,0.38,0.18,0.38,0.10,0.42,...,0.43,0.44,0.19,0.14,0.37,0.04,0.40,0.41,0.06,0.37
"Other business support service activities, n.e.c.",0.26,0.05,0.46,0.18,0.23,0.35,0.37,0.23,0.46,0.29,...,0.26,0.13,0.11,0.43,0.09,0.42,0.07,0.21,0.44,0.32
"Non-ferrous rolling, drawing and extrusion mills",0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
"Manufacture of steam generators, except central heating hot water",0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [11]:
get_shock_stats_subsector()

KeyError: "None of ['subsector'] are in the columns"

In [19]:
ll = merge_rank()

In [22]:
ll = ll[['a09_pqkb','LFS_sector']]
ll = ll.drop_duplicates(subset='a09_pqkb')
ll = ll.rename(columns={'a09_pqkb': 'subsector'}).set_index('subsector')

ll

Unnamed: 0_level_0,LFS_sector
subsector,Unnamed: 1_level_1
"Accounting, bookkeeping and auditing activities; tax consultancy",professional_services
Defense activities,government
Women's and girls' and babies' garment manufacturing,manufacturing
General public administration activities,government
Restaurants and mobile food service activities,food_entertainment
...,...
Manufacture of wooden containers,manufacturing
"Other business support service activities, n.e.c.",professional_services
"Non-ferrous rolling, drawing and extrusion mills",manufacturing
"Manufacture of steam generators, except central heating hot water",manufacturing


In [24]:
a = pd.merge(ll,c, on='subsector',how='left')
a.to_csv('./temp/subsector_shocks_20200423_psv0.csv')
a

Unnamed: 0_level_0,LFS_sector,mean,std_dev
subsector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Accounting, bookkeeping and auditing activities; tax consultancy",professional_services,0.245,0.155
Defense activities,government,1.000,0.000
Women's and girls' and babies' garment manufacturing,manufacturing,0.000,0.000
General public administration activities,government,1.000,0.000
Restaurants and mobile food service activities,food_entertainment,0.000,0.000
...,...,...,...
Manufacture of wooden containers,manufacturing,0.238,0.137
"Other business support service activities, n.e.c.",professional_services,0.259,0.141
"Non-ferrous rolling, drawing and extrusion mills",manufacturing,0.000,0.000
"Manufacture of steam generators, except central heating hot water",manufacturing,0.000,0.000
