In [1]:
import numpy as np
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import math
import glob,os
import sys
import scipy
from importlib import  reload
from time import process_time 
#from libraries.lib_gather_data import get_hhid_FIES
from datetime import datetime
%load_ext autoreload

In [2]:
from shock_libraries import *
from plotting_libraries import *
from response_libraries import get_response_sp
#
from income_shock_libraries_ps import *
#
from libraries.lib_country_dir import set_directories, load_survey_data, get_places_dict
from libraries.lib_get_hh_savings import get_hh_savings
from libraries.pandas_helper import broadcast_simple

In [3]:
# formatting & aesthetics
font = {'family':'sans serif', 'size':10}
plt.rc('font', **font)
mpl.rcParams['xtick.labelsize'] = 10
mpl.rcParams['ytick.labelsize'] = 10
mpl.rcParams['legend.facecolor'] = 'white'
sns.set_style("white")

sns_pal = sns.color_palette('Set1', n_colors=8, desat=.4)
greys_pal = sns.color_palette('Greys', n_colors=9)

In [4]:
#### ORIGINAL FUNCTION
#---------------------------------- Added: 20200422: <PS>

def rand_weighted_shock_3dim_v2_edit():

    """    
    Updated 20200422 
        - incorporate 3rd dimension for sector (public/private/gov) and impact on essentiality
            - now, only enforce that government jobs are maintained, across all sectors
    
    Updated 20200419 
        - incorporate 3rd dimension for sector (public/private/gov) and impact on essentiality 
    Updated 20200413:
        - incorporate 2nd dimension for social distancing potential
    module added to covid_phl: <income_shock_libraries_ps.py>

    primary development: <FCT> rand_weigthed_shock_distance() <FCT>

    - function to replace: 
        rand_weighted_shock_3dim()--> rand_weighted_shock_distance() --> rand_weighted_shock_1() --> get_income_shock(): in <shock_libraries.py>

    - description:
        * matches existing df_shock dataframe (compatibiility)
        * uses Kayenat table of job descriptions demand value for 'a09_pqkb' by sector to create weighted probability of job disruption by sector, as input to 'fa' column of df_shock             
        dataframe -- representative FIES and LFS data 

       * for values 0.0, 0.5,1 : assigns each job description a random: 0-50%, 50-99%, 100% chance of disruption, weighting each by the prevalence of that role in each sector, to generate cumulative probability of disruption. 


        * now incorporates enforcement of social distancing measures, by enforcing social distance in non-essential jobs based on K.Kabirs' 0-4 'work-from-home' scoring.



    """
    # develop 3 factor code here:

    # make each factor modular

    mr = merge_rank()
    if not 'LFS_sector' in mr.columns:
        mr = mr.rename(columns={'LFS_sector_x': 'LFS_sector'})
            # get subset: a09_pqkb
    mr_subset = mr[['hhid_lfs','LFS_sector','cc101_lno','a09_pqkb','c19_pclass','demand_scale', 'w_home']]

    indexNames = mr_subset[mr_subset['a09_pqkb'] == 'nan' ].index

        # Delete these row indexes from dataFrame
    mr_subset.drop(indexNames , inplace=True)
    mr_subset = mr_subset.reset_index(drop=True)

     # get subset: c19_pclass

    indexNames2 = mr_subset[mr_subset['c19_pclass'] == 'nan' ].index

        # Delete these row indexes from dataFrame
    mr_subset.drop(indexNames2 , inplace=True)
    mr_subset = mr_subset.reset_index(drop=True)

    # make new column of combined string a09 && c19:
    mr_subset['a09c19'] = mr_subset['a09_pqkb'] +'-'+mr_subset['c19_pclass']

        # enforce string:
    mr_subset['a09_pqkb'] = [str(q) for q in mr_subset['a09_pqkb']] # enforce type = string
    mr_subset['LFS_sector'] = [str(q) for q in mr_subset['LFS_sector']] # enforce type = string
    mr_subset['c19_pclass'] = [str(q) for q in mr_subset['c19_pclass']] # enforce type = string
    mr_subset['a09c19'] = [str(q) for q in mr_subset['a09c19']] # enforce type = string


        # generate fraction by sector
    mr_subset['desc_count'] = mr_subset.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
    mr_subset['sector_count'] = mr_subset.groupby('LFS_sector')['LFS_sector'].transform('count') #count total unique sectors and append to mr_subset
    mr_subset['sector_frac'] = mr_subset['desc_count'] / mr_subset['sector_count'] # get fraction of sector as weighting



    #####
    # here, need to insert a new column that merges a09 and c19 -- done
    # then, drop duplicates off of this column, so that we can minimize computation

    # still need logic to build the logic for each job sector
    ## may need to restructure this whole section of code

    #####
        # drop duplicates (now that overall weighting established)
    mr_subset = mr_subset.drop_duplicates(subset='a09_pqkb')
    mr_subset = mr_subset.reset_index(drop=True)



        # generate probability and combine with relative weighting
    mr_subset['partial_prob'] = np.nan
    mr_subset['third_col'] = np.nan
    mr_subset['dummy'] = np.nan

        # incorporate Kayenat tables into 'di' &&
        # nested logic to incorporate 0-4 scale for social distancing measures
        ## where scores of 0 & 1 result in complete job lost, due to unable to distance
    i=0
    while i < len(mr_subset):



        if mr_subset.demand_scale[i] == 0:

                # incorporate 0-4 scale logic:

            if mr_subset.w_home[i] == 0:
                mr_subset.partial_prob[i] = 1

            elif mr_subset.w_home[i] == 1:
                mr_subset.partial_prob[i] = 1

            else:
                mr_subset.partial_prob[i] = mr_subset.sector_frac[i] * (random.randint(0,50)/100)


        elif mr_subset.demand_scale[i] == 0.5: 

                # incorporate 0-4 scale logic:
            if mr_subset.w_home[i] == 0:
                mr_subset.partial_prob[i] = 1

            elif mr_subset.w_home[i] == 1:
                mr_subset.partial_prob[i] = 1

            else: 
                mr_subset.partial_prob[i] = mr_subset.sector_frac[i] * (random.randint(50,100)/100)

        elif mr_subset.demand_scale[i] == 1.0:
            mr_subset.partial_prob[i] = mr_subset.sector_frac[i]
        else:
            mr_subset.dummy[i] = -99

            
        # incorporate 3rd column modifiers here:
        if (mr_subset['c19_pclass'][i] == "Gov't/Gov't Corporation"):
            mr_subset.partial_prob[i] = 0  # essentially reverts the random uniform logic implemented above


        i = i + 1

        # remove nans in summing fields, and dummy storage
    del mr_subset['dummy']

        #define shock table:
    shock_null = { 'ag':           [  0,  0],
                     'mining':        [  0,  0],
                     'utilities':     [  0,  0],
                     'construction':  [0.0,1.0],
                     'manufacturing': [0.0,1.0],
                     'wholesale':     [0.0,1.0],
                     'retail':        [0.0,1.0],
                     'transportation':[0.0,1.0],
                     'information':   [0.0,1.0],
                     'finance':       [0.0,1.0],
                     'professional_services':[0.0,1.0],
                     'eduhealth':     [0.0,1.0],
                     'food_entertainment':[0.0,1.0],
                     'government':    [  0,  0],
                     'other':         [0.0,1.0]}
    df_shock_null = pd.DataFrame(data=shock_null).T
    df_shock_null.columns = ['fa','di']
    df_shock_null.index.name = 'LFS_sector'


    df_shock_cum = df_shock_null

        # get mean probability by sector:

    for seclist in df_shock_cum.index: # hard-coded to existing shock table

        pp = mr_subset[mr_subset.LFS_sector == seclist]
        p4 = 1 - sum(pp.partial_prob)

            # build shock table:
        df_shock_cum['fa'][seclist] = df_shock_cum['fa'][seclist] + p4

        # save to separate var for testing    
    rand_weighted_shock = df_shock_cum
    
    return(rand_weighted_shock)


In [5]:
a = rand_weighted_shock_3dim_v2_edit()
a

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guid

Unnamed: 0_level_0,fa,di
LFS_sector,Unnamed: 1_level_1,Unnamed: 2_level_1
ag,-3.954463,0.0
mining,0.8497955,0.0
utilities,-0.8034483,0.0
construction,0.9073478,1.0
manufacturing,-120.2407,1.0
wholesale,-15.32032,1.0
retail,-37.66934,1.0
transportation,-12.06748,1.0
information,1.110223e-16,1.0
finance,-0.8934774,1.0


In [6]:
#### TESTING AS SCRIPT
#---------------------------------- Added: 20200422: <PS>


mr = merge_rank('./temp/lfs_a09_pqkb_ranked_V2_entrpreneurial_20200423.csv')
if not 'LFS_sector' in mr.columns:
        mr = mr.rename(columns={'LFS_sector_x': 'LFS_sector'})


In [7]:
    # get subset: a09_pqkb
mr_subset = mr[['hhid_lfs','cc101_lno','LFS_sector','a09_pqkb','c19_pclass','demand_scale', 'w_home','E_sector']]
mr_subset

Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector
0,101011011112,4,professional_services,"Accounting, bookkeeping and auditing activitie...",Private Establishment,0.0,4.0,Entrep. Activities NEC
1,101011011112,6,government,Defense activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
2,101011011112,8,manufacturing,Women's and girls' and babies' garment manufac...,Private Establishment,0.0,0.0,Manufacturing
3,101011013134,1,government,General public administration activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
4,101011013134,2,food_entertainment,Restaurants and mobile food service activities,Self Employed,0.5,1.0,CSRP services
...,...,...,...,...,...,...,...,...
75987,984350483,6,ag,Growing of paddy rice,Self Employed,1.0,2.0,Crop Farming and Gardening
75988,984350483,9,ag,Logging,Self Employed,0.0,2.0,Forestry and Hunting
75989,9843505110,2,retail,Retail sale in non-specialized stores with foo...,Self Employed,1.0,0.0,Wholesale and Retail
75990,9843505110,3,ag,Growing of paddy rice,Without Pay (Family owned Business),1.0,2.0,Crop Farming and Gardening


In [8]:
indexNames = mr_subset[mr_subset['a09_pqkb'] == 'nan' ].index

    # Delete these row indexes from dataFrame
mr_subset.drop(indexNames , inplace=True)
mr_subset = mr_subset.reset_index(drop=True)

 # get subset: c19_pclass

indexNames2 = mr_subset[mr_subset['c19_pclass'] == 'nan' ].index

    # Delete these row indexes from dataFrame
mr_subset.drop(indexNames2 , inplace=True)
mr_subset = mr_subset.reset_index(drop=True)
mr_subset

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector
0,101011011112,4,professional_services,"Accounting, bookkeeping and auditing activitie...",Private Establishment,0.0,4.0,Entrep. Activities NEC
1,101011011112,6,government,Defense activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
2,101011011112,8,manufacturing,Women's and girls' and babies' garment manufac...,Private Establishment,0.0,0.0,Manufacturing
3,101011013134,1,government,General public administration activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
4,101011013134,2,food_entertainment,Restaurants and mobile food service activities,Self Employed,0.5,1.0,CSRP services
...,...,...,...,...,...,...,...,...
74272,984350483,6,ag,Growing of paddy rice,Self Employed,1.0,2.0,Crop Farming and Gardening
74273,984350483,9,ag,Logging,Self Employed,0.0,2.0,Forestry and Hunting
74274,9843505110,2,retail,Retail sale in non-specialized stores with foo...,Self Employed,1.0,0.0,Wholesale and Retail
74275,9843505110,3,ag,Growing of paddy rice,Without Pay (Family owned Business),1.0,2.0,Crop Farming and Gardening


In [9]:
# make new column of combined string a09 && c19:
mr_subset['a09c19'] = mr_subset['a09_pqkb'] +'-'+mr_subset['c19_pclass']

    # enforce string:
mr_subset['a09_pqkb'] = [str(q).strip() for q in mr_subset['a09_pqkb']] # enforce type = string
mr_subset['LFS_sector'] = [str(q).strip() for q in mr_subset['LFS_sector']] # enforce type = string
mr_subset['c19_pclass'] = [str(q).strip() for q in mr_subset['c19_pclass']] # enforce type = string
mr_subset['a09c19'] = [str(q).strip() for q in mr_subset['a09c19']] # enforce type = string
if 'E_sector' in mr_subset.columns:
    mr_subset['E_sector'] = [str(q).strip() for q in mr_subset['E_sector']] # enforce type = string

mr_subset


Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector,a09c19
0,101011011112,4,professional_services,"Accounting, bookkeeping and auditing activitie...",Private Establishment,0.0,4.0,Entrep. Activities NEC,"Accounting, bookkeeping and auditing activitie..."
1,101011011112,6,government,Defense activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services,Defense activities-Gov't/Gov't Corporation
2,101011011112,8,manufacturing,Women's and girls' and babies' garment manufac...,Private Establishment,0.0,0.0,Manufacturing,Women's and girls' and babies' garment manufac...
3,101011013134,1,government,General public administration activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services,General public administration activities-Gov't...
4,101011013134,2,food_entertainment,Restaurants and mobile food service activities,Self Employed,0.5,1.0,CSRP services,Restaurants and mobile food service activities...
...,...,...,...,...,...,...,...,...,...
74272,984350483,6,ag,Growing of paddy rice,Self Employed,1.0,2.0,Crop Farming and Gardening,Growing of paddy rice-Self Employed
74273,984350483,9,ag,Logging,Self Employed,0.0,2.0,Forestry and Hunting,Logging-Self Employed
74274,9843505110,2,retail,Retail sale in non-specialized stores with foo...,Self Employed,1.0,0.0,Wholesale and Retail,Retail sale in non-specialized stores with foo...
74275,9843505110,3,ag,Growing of paddy rice,Without Pay (Family owned Business),1.0,2.0,Crop Farming and Gardening,Growing of paddy rice-Without Pay (Family owne...


In [10]:
x = np.unique(mr_subset.c19_pclass)
x

array(['Employer', "Gov't/Gov't Corporation", 'Private Establishment',
       'Private Household', 'Self Employed',
       'With pay (Family owned Business)',
       'Without Pay (Family owned Business)'], dtype=object)

In [11]:
### create entrepreneurial table:
#rslt_df = dataframe[dataframe['Percentage'] > 80] 
#subsetDataFrame = dfObj[dfObj['Product'].isin(['Mangos', 'Grapes']) ]


# or nonag_wage : private household, private establishment, govt corporation, with pay (family owned business)
df_nonag = mr_subset[~mr_subset['c19_pclass'].isin(['Self Employed', 'Employer','Without Pay (Family owned Business)'])]
df_nonag


# generate fraction by  ENTREPRENEURIAL sector
df_nonag['desc_count'] = df_nonag.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
df_nonag['sector_count'] = df_nonag.groupby('E_sector')['E_sector'].transform('count') #count total unique sectors and append to mr_subset
df_nonag['sector_frac'] = df_nonag['desc_count'] / df_nonag['sector_count'] # get fraction of sector as weightin

#### for now we will leave non-ag here

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [12]:
# for entrepreneurial income: self employed, employer, withOUT pay (family owned business)
df_ent = mr_subset[mr_subset['c19_pclass'].isin(['Self Employed', 'Employer','Without Pay (Family owned Business)'])]
df_ent

# generate fraction by  ENTREPRENEURIAL sector
df_ent['desc_count'] = df_ent.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
df_ent['sector_count'] = df_ent.groupby('E_sector')['E_sector'].transform('count') #count total unique sectors and append to mr_subset
df_ent['sector_frac'] = df_ent['desc_count'] / df_ent['sector_count'] # get fraction of sector as weightin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [13]:
    
    
    
#     # generate fraction by sector
# mr_subset['desc_count'] = mr_subset.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
# mr_subset['sector_count'] = mr_subset.groupby('LFS_sector')['LFS_sector'].transform('count') #count total unique sectors and append to mr_subset
# mr_subset['sector_frac'] = mr_subset['desc_count'] / mr_subset['sector_count'] # get fraction of sector as weighting



#####
# here, need to insert a new column that merges a09 and c19 -- done
# then, drop duplicates off of this column, so that we can minimize computation

# still need logic to build the logic for each job sector
## may need to restructure this whole section of code

#####
    # drop duplicates (now that overall weighting established)
df_ent = df_ent.drop_duplicates(subset='a09_pqkb')
df_ent = df_ent.reset_index(drop=True)



    # generate probability and combine with relative weighting
df_ent['partial_prob'] = np.nan
df_ent['third_col'] = np.nan
df_ent['dummy'] = np.nan

    # incorporate Kayenat tables into 'di' &&
    # nested logic to incorporate 0-4 scale for social distancing measures
    ## where scores of 0 & 1 result in complete job lost, due to unable to distance
i=0
while i < len(df_ent):

    if df_ent.demand_scale[i] == 0:

        # incorporate 0-4 scale logic:

        if df_ent.w_home[i] == 0:
            df_ent.partial_prob[i] = 0

        elif df_ent.w_home[i] == 1:
            df_ent.partial_prob[i] = 0

        else:
            df_ent.partial_prob[i] = df_ent.sector_frac[i] * (random.randint(0,50)/100)


    elif df_ent.demand_scale[i] == 0.5: 

        # incorporate 0-4 scale logic:
        if df_ent.w_home[i] == 0:
            df_ent.partial_prob[i] = 0

        elif df_ent.w_home[i] == 1:
            df_ent.partial_prob[i] = 0

        else: 
            df_ent.partial_prob[i] = df_ent.sector_frac[i] * (random.randint(50,100)/100)

    elif df_ent.demand_scale[i] == 1.0:
        df_ent.partial_prob[i] = df_ent.sector_frac[i]
    else:
        df_ent.dummy[i] = -99
    i = i + 1

# incorporate 3rd column modifiers here:
# if (df_ent['c19_pclass'][i] == "Gov't/Gov't Corporation"):
#     df_ent.partial_prob[i] = 0  # essentially reverts the random uniform logic implemented above


i = i + 1

    # remove nans in summing fields, and dummy storage
del df_ent['dummy']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

In [14]:
df_ent['c19_pclass'] == "Gov't/Gov't Corporation"

0      False
1      False
2      False
3      False
4      False
       ...  
313    False
314    False
315    False
316    False
317    False
Name: c19_pclass, Length: 318, dtype: bool

In [15]:
# get mean probability by sector:


#storage['fa'] = np.nan
storage = pd.DataFrame(columns=['fa', 'di'], index=[np.unique(df_ent.E_sector)])



for seclist in np.unique(df_ent.E_sector): # hard-coded to existing shock table

    pp = df_ent[df_ent.E_sector == seclist]
    p4 = 1 - sum(pp.partial_prob)

    # build shock table:
    storage['fa'][seclist] = storage['fa'][seclist] + p4
    print(seclist)
    
    
    
# save to separate var for testing    
rand_weighted_shock = storage

rand_weighted_shock
#return(rand_weighted_shock)

CSRP services
Construction
Crop Farming and Gardening
Entrep. Activities NEC
Fishing
Forestry and Hunting
Livestock and Poultry Raising
Manufacturing
Mining and Quarrying
Transportation, Storage and Comm. Services
Wholesale and Retail


Unnamed: 0,fa,di
CSRP services,,
Construction,,
Crop Farming and Gardening,,
Entrep. Activities NEC,,
Fishing,,
Forestry and Hunting,,
Livestock and Poultry Raising,,
Manufacturing,,
Mining and Quarrying,,
"Transportation, Storage and Comm. Services",,


In [16]:
storage = pd.DataFrame(columns = ['sector','fa'])
storage

Unnamed: 0,sector,fa


In [17]:
storage = pd.DataFrame(columns=['fa', 'di'], index=[np.unique(df_ent.E_sector)])
storage

Unnamed: 0,fa,di
CSRP services,,
Construction,,
Crop Farming and Gardening,,
Entrep. Activities NEC,,
Fishing,,
Forestry and Hunting,,
Livestock and Poultry Raising,,
Manufacturing,,
Mining and Quarrying,,
"Transportation, Storage and Comm. Services",,


In [18]:
pp = df_ent[df_ent.E_sector == 'Construction']
pp

Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector,a09c19,desc_count,sector_count,sector_frac,partial_prob,third_col
43,1022310168002,1,construction,Construction of buildings,Self Employed,0.0,2.0,Construction,Construction of buildings-Self Employed,138,197,0.700508,0.0,
109,13111228114,1,construction,Electrical installation,Self Employed,0.0,2.0,Construction,Electrical installation-Self Employed,13,197,0.06599,0.015178,
111,1314625147,1,construction,Building completion and finishing,Self Employed,0.0,2.0,Construction,Building completion and finishing-Self Employed,24,197,0.121827,0.002437,
113,131530111,1,construction,Construction of roads and railways,Self Employed,0.0,2.0,Construction,Construction of roads and railways-Self Employed,13,197,0.06599,0.023096,
138,14912010387,1,construction,Other specialized construction activities,Employer,0.0,2.0,Construction,Other specialized construction activities-Empl...,2,197,0.010152,0.001421,
166,195120347,1,construction,"Cutting, shaping and finishing of stone",Self Employed,0.0,2.0,Construction,"Cutting, shaping and finishing of stone-Self E...",3,197,0.015228,0.004873,
260,3917107238,2,construction,Other construction installation,Self Employed,0.0,2.0,Construction,Other construction installation-Self Employed,1,197,0.005076,0.001777,
272,453011212240,1,construction,Construction of utility projects,Employer,0.0,2.0,Construction,Construction of utility projects-Employer,3,197,0.015228,0.004112,


In [19]:
df_ent[df_ent.E_sector == 'Wholesale and Retail'].partial_prob.sum()

0.7483346541378921

In [20]:
df_ent[df_ent.E_sector == 'Fishing'].partial_prob.sum()

0.9995994277539342

In [21]:
np.unique(df_ent.E_sector) #### the spacing is fucking up the table!

array(['CSRP services', 'Construction', 'Crop Farming and Gardening',
       'Entrep. Activities NEC', 'Fishing', 'Forestry and Hunting',
       'Livestock and Poultry Raising', 'Manufacturing',
       'Mining and Quarrying',
       'Transportation, Storage and Comm. Services',
       'Wholesale and Retail'], dtype=object)

In [22]:
storage = pd.DataFrame(columns=['fa', 'di'], index=[np.unique(df_ent.E_sector)])

for seclist in np.unique(df_ent.E_sector):
    pillow = 1 - (df_ent[df_ent.E_sector == seclist].partial_prob.sum())
#     if pillow > 1:
#         pillow = 1
    print(pillow)
    #storage[seclist]['fa'] = df_ent[df_ent.E_sector == seclist].partial_prob.sum()
    #df.loc[0:15,'A'] = 16
    storage.loc[seclist,'fa'] = pillow


0.9039544103072349
0.9471065989847716
0.006349871884463121
0.29460377358490564
0.0004005722460658223
0.6041322314049586
0.0008658008658009031
0.6947873303167421
0.8171428571428572
0.9443998553868402
0.25166534586210787


In [23]:
storage

Unnamed: 0,fa,di
CSRP services,0.903954,
Construction,0.947107,
Crop Farming and Gardening,0.00634987,
Entrep. Activities NEC,0.294604,
Fishing,0.000400572,
Forestry and Hunting,0.604132,
Livestock and Poultry Raising,0.000865801,
Manufacturing,0.694787,
Mining and Quarrying,0.817143,
"Transportation, Storage and Comm. Services",0.9444,


In [24]:
tstamp = (datetime.now().strftime("%Y%m%d_%H%M"))
storage.to_csv('./temp/entrep_table_problem_' +tstamp+'.csv')

In [None]:
def generate_shock_100_entre():  # initialize shock sector storage dataframe
   
    '''
   current hard coding for sensitivity analysis, 20200413: requires cleaning for further implementation
   - addition of modularity
   - 
   - current functionality:
       - outputs csv to location: './temp/sect_iter_100.csv
       - containing data frame with 101 simulations of <rand_weighted_shock_distance():
    - runtime: ~10minutes
   '''

    stor = rand_weighted_shock_3dim_v2()
    del stor['di']

    # set number of iterations
    p = 0
    n_iter = 99

    # model and store stochastic sector response
    while p < n_iter:
        new_val = rand_weighted_shock_3dim_v2()
        del new_val['di']
        new_val = new_val.rename(columns={'fa': ('iter'+str(p))})

        # pd.merge(labor,rank, on=merge_col, how='left')
        stor = pd.merge(stor,new_val,on='LFS_sector', how='left')
        p = p+ 1
        print(p)
    stor.to_csv('./temp/sect_iter_100_3dv2_20200422.csv')
