In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')

%matplotlib inline

In [4]:
census_df = pd.read_csv('../../src/csv/employed_adults_apr2020_jul2020.csv', index_col=0)

In [5]:
census_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11072 entries, 9 to 72340
Columns: 777 entries, HRHHID_x to IND_ID_FINAL_y
dtypes: int64(774), object(3)
memory usage: 65.7+ MB


In [6]:
census_df.columns

Index(['HRHHID_x', 'HRMONTH_x', 'HRYEAR4_x', 'HURESPLI_x', 'HUFINAL_x',
       'HETENURE_x', 'HEHOUSUT_x', 'HETELHHD_x', 'HETELAVL_x', 'HEPHONEO_x',
       ...
       'PTNMEMP1_y', 'PEPDEMP2_y', 'PTNMEMP2_y', 'PECERT1_y', 'PECERT2_y',
       'PECERT3_y', 'PXCERT1_y', 'PXCERT2_y', 'PXCERT3_y', 'IND_ID_FINAL_y'],
      dtype='object', length=777)

# X is from our april data set, Y is from our july data set

## Create our target variable from PREMPNOT_y

#### From the data dict:
1	EMPLOYED
2	UNEMPLOYED
3	NOT IN LABOR FORCE (NILF)-discouraged
4	NOT IN LABOR FORCE (NILF)-other

So we want a bin the variable as 1 being unemployed ie they DID lose their job


In [12]:
def job_loss_categorization(n):
    if n > 1:
        return 1
    else:
        return 0

The negative values in the PREMPNOT must be individuals in the military since we filtered for children. PREMPNOT is based on PEMLR 1-7 which is based on PRPPERTYP 2 which is adult civilians. 

We filtered the initial data set for PRPPERTYP for 2 and PREMPNOT_x has only 1. The conclusion is that 25% of our cohort joined the military

In [20]:
census_df.PREMPNOT_y.value_counts()#apply(job_loss_categorization).value_counts()

 1    6399
-1    2659
 4    1590
 2     409
 3      15
Name: PREMPNOT_y, dtype: int64

In [18]:
census_df.PREMPNOT_x.value_counts()

1    11072
Name: PREMPNOT_x, dtype: int64

In [21]:
2659/11072

0.24015534682080925

In [25]:
census_df.drop_duplicates(subset='HH_ID', keep='first', inplace=True)

In [26]:
census_df.PREMPNOT_x.value_counts()

1    4432
Name: PREMPNOT_x, dtype: int64

In [27]:
census_df.PREMPNOT_y.value_counts()

 1    3695
-1     449
 4     158
 2     127
 3       3
Name: PREMPNOT_y, dtype: int64

In [29]:
census_df[census_df.PREMPNOT_y == -1]

Unnamed: 0,HRHHID_x,HRMONTH_x,HRYEAR4_x,HURESPLI_x,HUFINAL_x,HETENURE_x,HEHOUSUT_x,HETELHHD_x,HETELAVL_x,HEPHONEO_x,...,PTNMEMP1_y,PEPDEMP2_y,PTNMEMP2_y,PECERT1_y,PECERT2_y,PECERT3_y,PXCERT1_y,PXCERT2_y,PXCERT3_y,IND_ID_FINAL_y
1075,129050010040777,4,2020,2,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,12905001004077711111-1-1-1-1
1160,310959120604208,4,2020,1,201,-1,1,1,-1,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,31095912060420810011-1-1-1-1
1932,602904401580005,4,2020,1,201,-1,1,1,2,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,60290440158000510111-1-1-1-1
2058,20601151209317,4,2020,2,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,2060115120931710011-1-1-1-1
2367,231225003600071,4,2020,1,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,23122500360007110111-1-1-1-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71643,510008465382816,4,2020,1,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,51000846538281610011-1-1-1-1
71743,622770406691205,4,2020,2,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,62277040669120510011-1-1-1-1
71756,641170592691205,4,2020,1,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,64117059269120510012-1-1-1-1
71761,648510487141105,4,2020,1,201,-1,1,1,-1,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,64851048714110511011-1-1-1-1
