In [1]:
import numpy as np
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import math
import glob,os
import sys
import scipy
from importlib import  reload
from time import process_time 
#from libraries.lib_gather_data import get_hhid_FIES
from datetime import datetime
%load_ext autoreload

In [2]:
from shock_libraries import *
from plotting_libraries import *
from response_libraries import get_response_sp
#
from income_shock_libraries_ps import *
#
from libraries.lib_country_dir import set_directories, load_survey_data, get_places_dict
from libraries.lib_get_hh_savings import get_hh_savings
from libraries.pandas_helper import broadcast_simple

In [3]:
# formatting & aesthetics
font = {'family':'sans serif', 'size':10}
plt.rc('font', **font)
mpl.rcParams['xtick.labelsize'] = 10
mpl.rcParams['ytick.labelsize'] = 10
mpl.rcParams['legend.facecolor'] = 'white'
sns.set_style("white")

sns_pal = sns.color_palette('Set1', n_colors=8, desat=.4)
greys_pal = sns.color_palette('Greys', n_colors=9)

In [4]:
#### ORIGINAL FUNCTION
#---------------------------------- Added: 20200422: <PS>

def rand_weighted_shock_3dim_v2_edit():

    """    
    Updated 20200422 
        - incorporate 3rd dimension for sector (public/private/gov) and impact on essentiality
            - now, only enforce that government jobs are maintained, across all sectors
    
    Updated 20200419 
        - incorporate 3rd dimension for sector (public/private/gov) and impact on essentiality 
    Updated 20200413:
        - incorporate 2nd dimension for social distancing potential
    module added to covid_phl: <income_shock_libraries_ps.py>

    primary development: <FCT> rand_weigthed_shock_distance() <FCT>

    - function to replace: 
        rand_weighted_shock_3dim()--> rand_weighted_shock_distance() --> rand_weighted_shock_1() --> get_income_shock(): in <shock_libraries.py>

    - description:
        * matches existing df_shock dataframe (compatibiility)
        * uses Kayenat table of job descriptions demand value for 'a09_pqkb' by sector to create weighted probability of job disruption by sector, as input to 'fa' column of df_shock             
        dataframe -- representative FIES and LFS data 

       * for values 0.0, 0.5,1 : assigns each job description a random: 0-50%, 50-99%, 100% chance of disruption, weighting each by the prevalence of that role in each sector, to generate cumulative probability of disruption. 


        * now incorporates enforcement of social distancing measures, by enforcing social distance in non-essential jobs based on K.Kabirs' 0-4 'work-from-home' scoring.



    """
    # develop 3 factor code here:

    # make each factor modular

    mr = merge_rank()
    if not 'LFS_sector' in mr.columns:
        mr = mr.rename(columns={'LFS_sector_x': 'LFS_sector'})
            # get subset: a09_pqkb
    mr_subset = mr[['hhid_lfs','LFS_sector','cc101_lno','a09_pqkb','c19_pclass','demand_scale', 'w_home']]

    indexNames = mr_subset[mr_subset['a09_pqkb'] == 'nan' ].index

        # Delete these row indexes from dataFrame
    mr_subset.drop(indexNames , inplace=True)
    mr_subset = mr_subset.reset_index(drop=True)

     # get subset: c19_pclass

    indexNames2 = mr_subset[mr_subset['c19_pclass'] == 'nan' ].index

        # Delete these row indexes from dataFrame
    mr_subset.drop(indexNames2 , inplace=True)
    mr_subset = mr_subset.reset_index(drop=True)

    # make new column of combined string a09 && c19:
    mr_subset['a09c19'] = mr_subset['a09_pqkb'] +'-'+mr_subset['c19_pclass']

        # enforce string:
    mr_subset['a09_pqkb'] = [str(q) for q in mr_subset['a09_pqkb']] # enforce type = string
    mr_subset['LFS_sector'] = [str(q) for q in mr_subset['LFS_sector']] # enforce type = string
    mr_subset['c19_pclass'] = [str(q) for q in mr_subset['c19_pclass']] # enforce type = string
    mr_subset['a09c19'] = [str(q) for q in mr_subset['a09c19']] # enforce type = string


        # generate fraction by sector
    mr_subset['desc_count'] = mr_subset.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
    mr_subset['sector_count'] = mr_subset.groupby('LFS_sector')['LFS_sector'].transform('count') #count total unique sectors and append to mr_subset
    mr_subset['sector_frac'] = mr_subset['desc_count'] / mr_subset['sector_count'] # get fraction of sector as weighting



    #####
    # here, need to insert a new column that merges a09 and c19 -- done
    # then, drop duplicates off of this column, so that we can minimize computation

    # still need logic to build the logic for each job sector
    ## may need to restructure this whole section of code

    #####
        # drop duplicates (now that overall weighting established)
    mr_subset = mr_subset.drop_duplicates(subset='a09_pqkb')
    mr_subset = mr_subset.reset_index(drop=True)



        # generate probability and combine with relative weighting
    mr_subset['partial_prob'] = np.nan
    mr_subset['third_col'] = np.nan
    mr_subset['dummy'] = np.nan

        # incorporate Kayenat tables into 'di' &&
        # nested logic to incorporate 0-4 scale for social distancing measures
        ## where scores of 0 & 1 result in complete job lost, due to unable to distance
    i=0
    while i < len(mr_subset):



        if mr_subset.demand_scale[i] == 0:

                # incorporate 0-4 scale logic:

            if mr_subset.w_home[i] == 0:
                mr_subset.partial_prob[i] = 1

            elif mr_subset.w_home[i] == 1:
                mr_subset.partial_prob[i] = 1

            else:
                mr_subset.partial_prob[i] = mr_subset.sector_frac[i] * (random.randint(0,50)/100)


        elif mr_subset.demand_scale[i] == 0.5: 

                # incorporate 0-4 scale logic:
            if mr_subset.w_home[i] == 0:
                mr_subset.partial_prob[i] = 1

            elif mr_subset.w_home[i] == 1:
                mr_subset.partial_prob[i] = 1

            else: 
                mr_subset.partial_prob[i] = mr_subset.sector_frac[i] * (random.randint(50,100)/100)

        elif mr_subset.demand_scale[i] == 1.0:
            mr_subset.partial_prob[i] = mr_subset.sector_frac[i]
        else:
            mr_subset.dummy[i] = -99

            
        # incorporate 3rd column modifiers here:
        if (mr_subset['c19_pclass'][i] == "Gov't/Gov't Corporation"):
            mr_subset.partial_prob[i] = 0  # essentially reverts the random uniform logic implemented above


        i = i + 1

        # remove nans in summing fields, and dummy storage
    del mr_subset['dummy']

        #define shock table:
    shock_null = { 'ag':           [  0,  0],
                     'mining':        [  0,  0],
                     'utilities':     [  0,  0],
                     'construction':  [0.0,1.0],
                     'manufacturing': [0.0,1.0],
                     'wholesale':     [0.0,1.0],
                     'retail':        [0.0,1.0],
                     'transportation':[0.0,1.0],
                     'information':   [0.0,1.0],
                     'finance':       [0.0,1.0],
                     'professional_services':[0.0,1.0],
                     'eduhealth':     [0.0,1.0],
                     'food_entertainment':[0.0,1.0],
                     'government':    [  0,  0],
                     'other':         [0.0,1.0]}
    df_shock_null = pd.DataFrame(data=shock_null).T
    df_shock_null.columns = ['fa','di']
    df_shock_null.index.name = 'LFS_sector'


    df_shock_cum = df_shock_null

        # get mean probability by sector:

    for seclist in df_shock_cum.index: # hard-coded to existing shock table

        pp = mr_subset[mr_subset.LFS_sector == seclist]
        p4 = 1 - sum(pp.partial_prob)

            # build shock table:
        df_shock_cum['fa'][seclist] = df_shock_cum['fa'][seclist] + p4

        # save to separate var for testing    
    rand_weighted_shock = df_shock_cum
    
    return(rand_weighted_shock)


In [5]:
a = rand_weighted_shock_3dim_v2_edit()
a

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guid

Unnamed: 0_level_0,fa,di
LFS_sector,Unnamed: 1_level_1,Unnamed: 2_level_1
ag,-3.950896,0.0
mining,0.7773048,0.0
utilities,-0.8034483,0.0
construction,0.5856794,1.0
manufacturing,-120.2286,1.0
wholesale,-15.32032,1.0
retail,-37.66749,1.0
transportation,-12.07363,1.0
information,1.110223e-16,1.0
finance,-0.9076749,1.0


In [6]:
#### TESTING AS SCRIPT
#---------------------------------- Added: 20200422: <PS>


mr = merge_rank('./temp/lfs_a09_pqkb_ranked_V2_entrpreneurial_20200423.csv')
if not 'LFS_sector' in mr.columns:
        mr = mr.rename(columns={'LFS_sector_x': 'LFS_sector'})


In [7]:
    # get subset: a09_pqkb
mr_subset = mr[['hhid_lfs','cc101_lno','LFS_sector','a09_pqkb','c19_pclass','demand_scale', 'w_home','E_sector']]
mr_subset

Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector
0,101011011112,4,professional_services,"Accounting, bookkeeping and auditing activitie...",Private Establishment,0.0,4.0,Entrep. Activities NEC
1,101011011112,6,government,Defense activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
2,101011011112,8,manufacturing,Women's and girls' and babies' garment manufac...,Private Establishment,0.0,0.0,Manufacturing
3,101011013134,1,government,General public administration activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
4,101011013134,2,food_entertainment,Restaurants and mobile food service activities,Self Employed,0.5,1.0,CSRP services
...,...,...,...,...,...,...,...,...
75987,984350483,6,ag,Growing of paddy rice,Self Employed,1.0,2.0,Crop Farming and Gardening
75988,984350483,9,ag,Logging,Self Employed,0.0,2.0,Forestry and Hunting
75989,9843505110,2,retail,Retail sale in non-specialized stores with foo...,Self Employed,1.0,0.0,Wholesale and Retail
75990,9843505110,3,ag,Growing of paddy rice,Without Pay (Family owned Business),1.0,2.0,Crop Farming and Gardening


In [8]:
indexNames = mr_subset[mr_subset['a09_pqkb'] == 'nan' ].index

    # Delete these row indexes from dataFrame
mr_subset.drop(indexNames , inplace=True)
mr_subset = mr_subset.reset_index(drop=True)

 # get subset: c19_pclass

indexNames2 = mr_subset[mr_subset['c19_pclass'] == 'nan' ].index

    # Delete these row indexes from dataFrame
mr_subset.drop(indexNames2 , inplace=True)
mr_subset = mr_subset.reset_index(drop=True)
mr_subset

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector
0,101011011112,4,professional_services,"Accounting, bookkeeping and auditing activitie...",Private Establishment,0.0,4.0,Entrep. Activities NEC
1,101011011112,6,government,Defense activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
2,101011011112,8,manufacturing,Women's and girls' and babies' garment manufac...,Private Establishment,0.0,0.0,Manufacturing
3,101011013134,1,government,General public administration activities,Gov't/Gov't Corporation,1.0,1.0,CSRP services
4,101011013134,2,food_entertainment,Restaurants and mobile food service activities,Self Employed,0.5,1.0,CSRP services
...,...,...,...,...,...,...,...,...
74272,984350483,6,ag,Growing of paddy rice,Self Employed,1.0,2.0,Crop Farming and Gardening
74273,984350483,9,ag,Logging,Self Employed,0.0,2.0,Forestry and Hunting
74274,9843505110,2,retail,Retail sale in non-specialized stores with foo...,Self Employed,1.0,0.0,Wholesale and Retail
74275,9843505110,3,ag,Growing of paddy rice,Without Pay (Family owned Business),1.0,2.0,Crop Farming and Gardening


In [27]:
# make new column of combined string a09 && c19:
mr_subset['a09c19'] = mr_subset['a09_pqkb'] +'-'+mr_subset['c19_pclass']

    # enforce string:
mr_subset['a09_pqkb'] = [str(q).strip() for q in mr_subset['a09_pqkb']] # enforce type = string
mr_subset['LFS_sector'] = [str(q).strip() for q in mr_subset['LFS_sector']] # enforce type = string
mr_subset['c19_pclass'] = [str(q).strip() for q in mr_subset['c19_pclass']] # enforce type = string
mr_subset['a09c19'] = [str(q).strip() for q in mr_subset['a09c19']] # enforce type = string
if 'E_sector' in mr_subset.columns:
    mr_subset['E_sector'] = [str(q).strip() for q in mr_subset['E_sector']] # enforce type = string

mr_subset


array(['CSRP services', 'Construction', 'Crop Farming and Gardening',
       'Entrep. Activities NEC', 'Fishing', 'Forestry and Hunting',
       'Livestock and Poultry Raising', 'Manufacturing',
       'Mining and Quarrying',
       'Transportation, Storage and Comm. Services',
       'Wholesale and Retail'], dtype=object)

In [10]:
x = np.unique(mr_subset.c19_pclass)
x

array(['Employer', "Gov't/Gov't Corporation", 'Private Establishment',
       'Private Household', 'Self Employed',
       'With pay (Family owned Business)',
       'Without Pay (Family owned Business)'], dtype=object)

In [11]:
### create entrepreneurial table:
#rslt_df = dataframe[dataframe['Percentage'] > 80] 
#subsetDataFrame = dfObj[dfObj['Product'].isin(['Mangos', 'Grapes']) ]


# or nonag_wage : private household, private establishment, govt corporation, with pay (family owned business)
df_nonag = mr_subset[~mr_subset['c19_pclass'].isin(['Self Employed', 'Employer','Without Pay (Family owned Business)'])]
df_nonag


# generate fraction by  ENTREPRENEURIAL sector
df_nonag['desc_count'] = df_nonag.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
df_nonag['sector_count'] = df_nonag.groupby('E_sector')['E_sector'].transform('count') #count total unique sectors and append to mr_subset
df_nonag['sector_frac'] = df_nonag['desc_count'] / df_nonag['sector_count'] # get fraction of sector as weightin

#### for now we will leave non-ag here

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [12]:
# for entrepreneurial income: self employed, employer, withOUT pay (family owned business)
df_ent = mr_subset[mr_subset['c19_pclass'].isin(['Self Employed', 'Employer','Without Pay (Family owned Business)'])]
df_ent

# generate fraction by  ENTREPRENEURIAL sector
df_ent['desc_count'] = df_ent.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
df_ent['sector_count'] = df_ent.groupby('E_sector')['E_sector'].transform('count') #count total unique sectors and append to mr_subset
df_ent['sector_frac'] = df_ent['desc_count'] / df_ent['sector_count'] # get fraction of sector as weightin


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [13]:
    
    
    
#     # generate fraction by sector
# mr_subset['desc_count'] = mr_subset.groupby('a09_pqkb')['a09_pqkb'].transform('count')# count unique jobs and append to mr_subset
# mr_subset['sector_count'] = mr_subset.groupby('LFS_sector')['LFS_sector'].transform('count') #count total unique sectors and append to mr_subset
# mr_subset['sector_frac'] = mr_subset['desc_count'] / mr_subset['sector_count'] # get fraction of sector as weighting



#####
# here, need to insert a new column that merges a09 and c19 -- done
# then, drop duplicates off of this column, so that we can minimize computation

# still need logic to build the logic for each job sector
## may need to restructure this whole section of code

#####
    # drop duplicates (now that overall weighting established)
df_ent = df_ent.drop_duplicates(subset='a09_pqkb')
df_ent = df_ent.reset_index(drop=True)



    # generate probability and combine with relative weighting
df_ent['partial_prob'] = np.nan
df_ent['third_col'] = np.nan
df_ent['dummy'] = np.nan

    # incorporate Kayenat tables into 'di' &&
    # nested logic to incorporate 0-4 scale for social distancing measures
    ## where scores of 0 & 1 result in complete job lost, due to unable to distance
i=0
while i < len(df_ent):



    if df_ent.demand_scale[i] == 0:

            # incorporate 0-4 scale logic:

        if df_ent.w_home[i] == 0:
            df_ent.partial_prob[i] = 1

        elif df_ent.w_home[i] == 1:
            df_ent.partial_prob[i] = 1

        else:
            df_ent.partial_prob[i] = df_ent.sector_frac[i] * (random.randint(0,50)/100)


    elif df_ent.demand_scale[i] == 0.5: 

            # incorporate 0-4 scale logic:
        if df_ent.w_home[i] == 0:
            df_ent.partial_prob[i] = 1

        elif df_ent.w_home[i] == 1:
            df_ent.partial_prob[i] = 1

        else: 
            df_ent.partial_prob[i] = df_ent.sector_frac[i] * (random.randint(50,100)/100)

    elif df_ent.demand_scale[i] == 1.0:
        df_ent.partial_prob[i] = df_ent.sector_frac[i]
    else:
        df_ent.dummy[i] = -99


    # incorporate 3rd column modifiers here:
    if (df_ent['c19_pclass'][i] == "Gov't/Gov't Corporation"):
        df_ent.partial_prob[i] = 0  # essentially reverts the random uniform logic implemented above


    i = i + 1

    # remove nans in summing fields, and dummy storage
del df_ent['dummy']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

In [14]:
df_ent

Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector,a09c19,desc_count,sector_count,sector_frac,partial_prob,third_col
0,101011013134,2,food_entertainment,Restaurants and mobile food service activities,Self Employed,0.5,1.0,CSRP services,Restaurants and mobile food service activities...,1000,2018,0.495540,1.000000,
1,101011015158,1,ag,"Growing of corn, except young corn (vegetable)",Self Employed,1.0,2.0,Crop Farming and Gardening,"Growing of corn, except young corn (vegetable)...",3129,12879,0.242954,0.242954,
2,101011016169,1,ag,Growing of leafy and fruit bearing vegetables,Self Employed,1.0,2.0,Crop Farming and Gardening,Growing of leafy and fruit bearing vegetables-...,1250,12879,0.097057,0.097057,
3,101011019203,1,ag,Hog farming,Self Employed,1.0,0.0,Livestock and Poultry Raising,Hog farming-Self Employed,776,1155,0.671861,0.671861,
4,1010110327,1,ag,Growing of sugarcane including muscovado sugar...,Employer,1.0,2.0,Crop Farming and Gardening,Growing of sugarcane including muscovado sugar...,135,12879,0.010482,0.010482,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313,764824178010,1,manufacturing,Manufacture of refractory products,Employer,0.0,0.0,Manufacturing,Manufacture of refractory products-Employer,2,1105,0.001810,1.000000,
314,7649378010,2,eduhealth,Other social work activities without accommoda...,Self Employed,0.5,0.0,CSRP services,Other social work activities without accommoda...,1,2018,0.000496,1.000000,
315,81210013300,1,professional_services,Combined office administrative service activities,Self Employed,0.0,3.0,Entrep. Activities NEC,Combined office administrative service activit...,3,81,0.037037,0.018148,
316,8253416581,2,finance,Other activities auxilary to insurance and pen...,Self Employed,1.0,3.0,Entrep. Activities NEC,Other activities auxilary to insurance and pen...,1,184,0.005435,0.005435,


In [15]:
# get mean probability by sector:


#storage['fa'] = np.nan
storage = pd.DataFrame(columns=['fa', 'di'], index=[np.unique(df_ent.E_sector)])



for seclist in np.unique(df_ent.E_sector): # hard-coded to existing shock table

    pp = df_ent[df_ent.E_sector == seclist]
    p4 = 1 - sum(pp.partial_prob)

    # build shock table:
    storage['fa'][seclist] = storage['fa'][seclist] + p4
    print(seclist)
    
    
    
# save to separate var for testing    
rand_weighted_shock = storage

rand_weighted_shock
#return(rand_weighted_shock)

CSRP services
Construction 
Crop Farming and Gardening 
Entrep. Activities NEC
Entrep. Activities NEC 
Fishing
Forestry and Hunting 
Livestock and Poultry Raising   
Manufacturing   
Mining and Quarrying   
Transportation, Storage and Comm. Services
Transportation, Storage and Comm. Services 
Wholesale and Retail


Unnamed: 0,fa,di
CSRP services,,
Construction,,
Crop Farming and Gardening,,
Entrep. Activities NEC,,
Entrep. Activities NEC,,
Fishing,,
Forestry and Hunting,,
Livestock and Poultry Raising,,
Manufacturing,,
Mining and Quarrying,,


In [16]:
storage = pd.DataFrame(columns = ['sector','fa'])
storage

Unnamed: 0,sector,fa


In [17]:
storage = pd.DataFrame(columns=['fa', 'di'], index=[np.unique(df_ent.E_sector)])
storage

Unnamed: 0,fa,di
CSRP services,,
Construction,,
Crop Farming and Gardening,,
Entrep. Activities NEC,,
Entrep. Activities NEC,,
Fishing,,
Forestry and Hunting,,
Livestock and Poultry Raising,,
Manufacturing,,
Mining and Quarrying,,


In [18]:
pp = df_ent[df_ent.E_sector == 'Construction']
pp

Unnamed: 0,hhid_lfs,cc101_lno,LFS_sector,a09_pqkb,c19_pclass,demand_scale,w_home,E_sector,a09c19,desc_count,sector_count,sector_frac,partial_prob,third_col


In [19]:
df_ent[df_ent.E_sector == 'Wholesale and Retail'].partial_prob.sum()

48.74905241707234

In [20]:
df_ent[df_ent.E_sector == 'Fishing'].partial_prob.sum()

0.9997329518359561

In [21]:
np.unique(df_ent.E_sector) #### the spacing is fucking up the table!

array(['CSRP services', 'Construction ', 'Crop Farming and Gardening ',
       'Entrep. Activities NEC', 'Entrep. Activities NEC ', 'Fishing',
       'Forestry and Hunting ', 'Livestock and Poultry Raising   ',
       'Manufacturing   ', 'Mining and Quarrying   ',
       'Transportation, Storage and Comm. Services',
       'Transportation, Storage and Comm. Services ',
       'Wholesale and Retail'], dtype=object)

In [22]:
storage = pd.DataFrame(columns=['fa', 'di'], index=[np.unique(df_ent.E_sector)])

for seclist in np.unique(df_ent.E_sector):
    pillow = df_ent[df_ent.E_sector == seclist].partial_prob.sum()
    if pillow > 1:
        pillow = 1
    print(pillow)
    #storage[seclist]['fa'] = df_ent[df_ent.E_sector == seclist].partial_prob.sum()
    #df.loc[0:15,'A'] = 16
    storage.loc[seclist,'fa'] = pillow


1
0.2747715736040609
0.9943388461837099
1
1
0.9997329518359561
0.27148760330578514
1
1
0.2991836734693878
1.0
1
1


In [23]:
storage

Unnamed: 0,fa,di
CSRP services,1.0,
Construction,0.274772,
Crop Farming and Gardening,0.994339,
Entrep. Activities NEC,1.0,
Entrep. Activities NEC,1.0,
Fishing,0.999733,
Forestry and Hunting,0.271488,
Livestock and Poultry Raising,1.0,
Manufacturing,1.0,
Mining and Quarrying,0.299184,


In [24]:
tstamp = (datetime.now().strftime("%Y%m%d_%H%M"))
storage.to_csv('./temp/entrep_table_problem_' +tstamp+'.csv')