### Using IPUMS records, calculate life sciences occupations 

In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
# allow max rows and colums to be displayed
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Stat functions for using replicate weights

In [3]:
# functions to calculate standard error, moe, and coefficient of variation
def get_se(per_wt,rep_weights):
    result = math.sqrt((sum(map(lambda x: (x-per_wt)**2,rep_weights))/20))
    return result

def get_moe(se):
    return se*1.645 #90% confidence interval

def agg_moe(m):
    result = math.sqrt(sum(map(lambda x: x**2, m)))
    return result

def get_cv(est,m):
    if est == 0:
        return 0
    else:
        return (np.absolute(m/1.645/est))*100

In [4]:
# create a list of replicate weights
repwt = 'REPWTP'
repwts = [repwt+str(i) for i in range(1, 81)]

#### Geography look up files & cleanup

In [5]:
# pull in geography reference files
df = pd.read_csv('../data/occ_pums.csv')
respuma_00 = pd.read_csv('../data/respuma_xwalk_00.csv')
respuma_10 = pd.read_csv('../data/respuma_xwalk_10.csv')

In [6]:
respuma_10.head()

Unnamed: 0,StateFIPS,PUMA,GEOID10,NAME,Subregion,Subregion2,Region,County,CountyFIPS
0,9,100,900100,"Danbury, Ridgefield, Bethel, Brookfield, New F...",CT,CT,Region,Fairfield,9001
1,9,101,900101,"Fairfield, New Canaan, Wilton, Weston & Easton...",CT,CT,Region,Fairfield,9001
2,9,102,900102,Stamford & Greenwich Towns PUMA,CT,CT,Region,Fairfield,9001
3,9,103,900103,"Norwalk, Westport & Darien Towns PUMA",CT,CT,Region,Fairfield,9001
4,9,104,900104,Bridgeport Town PUMA,CT,CT,Region,Fairfield,9001


In [7]:
respuma_00.GISMATCH=respuma_00.GISMATCH.apply(str)
respuma_10.GEOID10=respuma_10.GEOID10.apply(str)

In [8]:
# create a new id columns for merge with geo lookup tables
df['stpuma_id'] = df['STATEFIP'].apply(str) + \
                    df['PUMA'].apply(str).apply(lambda x: '{0:0>5}'.format(x))

In [9]:
df.dtypes

YEAR           int64
MULTYEAR       int64
SAMPLE         int64
SERIAL         int64
CBSERIAL       int64
HHWT         float64
CLUSTER        int64
STATEFIP       int64
COUNTYFIP      int64
METRO          int64
CITY           int64
PUMA           int64
STRATA         int64
GQ             int64
PERNUM         int64
PERWT        float64
REPWTP         int64
AGE            int64
EDUC           int64
EDUCD          int64
CLASSWKR       int64
CLASSWKRD      int64
OCC            int64
OCCSOC        object
IND            int64
INDNAICS      object
INCWAGE        int64
PWSTATE2       int64
PWCOUNTY       int64
PWPUMA00       int64
TRANWORK       int64
REPWTP1        int64
REPWTP2        int64
REPWTP3        int64
REPWTP4        int64
REPWTP5        int64
REPWTP6        int64
REPWTP7        int64
REPWTP8        int64
REPWTP9        int64
REPWTP10       int64
REPWTP11       int64
REPWTP12       int64
REPWTP13       int64
REPWTP14       int64
REPWTP15       int64
REPWTP16       int64
REPWTP17     

In [10]:
#separate by year
df_10 = df[df.YEAR==2010]
df_19 = df[df.YEAR==2019]

In [11]:
reg_10 = pd.merge(df_10,respuma_00,how='left',left_on='stpuma_id',right_on='GISMATCH')
reg_19 = pd.merge(df_19,respuma_10,how='left',left_on='stpuma_id',right_on='GEOID10')

In [12]:
reg_10 = reg_10.dropna(subset=['Subregion'])
reg_10.head()

Unnamed: 0,YEAR,MULTYEAR,SAMPLE,SERIAL,CBSERIAL,HHWT,CLUSTER,STATEFIP_x,COUNTYFIP,METRO,CITY,PUMA_x,STRATA,GQ,PERNUM,PERWT,REPWTP,AGE,EDUC,EDUCD,CLASSWKR,CLASSWKRD,OCC,OCCSOC,IND,INDNAICS,INCWAGE,PWSTATE2,PWCOUNTY,PWPUMA00,TRANWORK,REPWTP1,REPWTP2,REPWTP3,REPWTP4,REPWTP5,REPWTP6,REPWTP7,REPWTP8,REPWTP9,REPWTP10,REPWTP11,REPWTP12,REPWTP13,REPWTP14,REPWTP15,REPWTP16,REPWTP17,REPWTP18,REPWTP19,REPWTP20,REPWTP21,REPWTP22,REPWTP23,REPWTP24,REPWTP25,REPWTP26,REPWTP27,REPWTP28,REPWTP29,REPWTP30,REPWTP31,REPWTP32,REPWTP33,REPWTP34,REPWTP35,REPWTP36,REPWTP37,REPWTP38,REPWTP39,REPWTP40,REPWTP41,REPWTP42,REPWTP43,REPWTP44,REPWTP45,REPWTP46,REPWTP47,REPWTP48,REPWTP49,REPWTP50,REPWTP51,REPWTP52,REPWTP53,REPWTP54,REPWTP55,REPWTP56,REPWTP57,REPWTP58,REPWTP59,REPWTP60,REPWTP61,REPWTP62,REPWTP63,REPWTP64,REPWTP65,REPWTP66,REPWTP67,REPWTP68,REPWTP69,REPWTP70,REPWTP71,REPWTP72,REPWTP73,REPWTP74,REPWTP75,REPWTP76,REPWTP77,REPWTP78,REPWTP79,REPWTP80,stpuma_id,StateName,STATEFIP_y,PUMA_y,GISJOIN,GISMATCH,CountyCode,CountyName,Subregion
5,2010,2006,201005,1085609,2006000000586,40.0,2010010856095,9,5,0,0,100,10009,3,1,40.0,1,94,2,24,0,0,0,0,0,0,0,0,0,0,0,12,12,40,12,70,40,39,40,68,39,12,40,72,71,39,42,66,12,43,42,68,66,38,70,12,39,39,41,13,39,68,41,12,12,41,42,12,70,41,41,66,69,39,70,13,40,39,39,12,39,69,41,12,12,39,41,12,71,41,41,11,12,38,12,70,39,38,40,69,40,12,41,70,72,41,42,69,12,41,41,900100,Connecticut,9.0,100.0,G09000100,900100,9005.0,Litchfield,CT
6,2010,2006,201005,1085610,2006000000750,35.0,2010010856105,9,9,2,7250,1900,190009,1,1,35.0,1,50,6,62,0,0,0,0,0,0,0,0,0,0,0,14,32,18,15,37,12,53,39,33,38,64,39,12,42,51,48,30,30,65,33,12,35,14,12,35,13,50,44,36,38,53,36,15,39,79,60,28,28,60,35,54,34,65,58,36,44,11,39,31,40,12,36,60,45,10,9,31,34,16,31,58,32,59,59,32,59,12,37,41,36,12,33,75,36,18,12,31,39,10,29,901900,Connecticut,9.0,1900.0,G09001900,901900,9009.0,New Haven,CT
7,2010,2006,201005,1085610,2006000000750,35.0,2010010856105,9,9,2,7250,1900,190009,1,2,35.0,1,75,6,62,0,0,0,0,0,0,0,0,0,0,0,12,32,17,15,37,12,54,37,33,37,64,38,11,43,49,47,29,29,65,29,10,35,14,10,33,13,49,40,36,35,52,36,14,41,79,58,29,28,56,33,55,33,62,54,34,46,13,36,28,37,13,39,60,41,12,11,29,33,16,30,54,30,58,59,31,56,12,38,40,33,13,33,73,38,16,13,31,36,11,32,901900,Connecticut,9.0,1900.0,G09001900,901900,9009.0,New Haven,CT
8,2010,2006,201005,1085611,2006000001050,21.0,2010010856115,9,9,2,7250,1900,190009,1,1,21.0,1,28,10,101,2,22,5240,434051,2190,3254,50836,9,1,2100,10,8,34,25,23,19,6,37,21,17,8,24,8,8,28,43,22,15,27,19,33,5,34,25,24,26,7,32,23,21,8,21,10,9,27,38,25,21,23,17,36,7,26,19,22,23,9,31,26,23,9,19,8,7,37,39,20,23,27,22,40,8,31,21,23,19,6,33,22,20,8,22,7,8,32,31,20,18,22,26,32,901900,Connecticut,9.0,1900.0,G09001900,901900,9009.0,New Haven,CT
9,2010,2006,201005,1085611,2006000001050,21.0,2010010856115,9,9,2,7250,1900,190009,1,2,21.0,1,30,10,101,2,22,1960,1940XX,7290,5413,47592,9,9,1500,10,8,36,23,22,19,7,36,21,19,7,21,8,8,28,43,22,13,25,20,33,5,34,24,25,27,8,32,21,19,8,22,8,7,27,38,27,22,25,15,39,7,26,18,23,25,9,30,27,22,10,20,7,6,36,39,20,21,29,20,41,11,27,20,22,18,6,31,21,20,8,28,7,8,33,32,19,15,24,25,34,901900,Connecticut,9.0,1900.0,G09001900,901900,9009.0,New Haven,CT


In [13]:
reg_19 = reg_19.dropna(subset=['Region'])
reg_19.head()

Unnamed: 0,YEAR,MULTYEAR,SAMPLE,SERIAL,CBSERIAL,HHWT,CLUSTER,STATEFIP,COUNTYFIP,METRO,CITY,PUMA_x,STRATA,GQ,PERNUM,PERWT,REPWTP,AGE,EDUC,EDUCD,CLASSWKR,CLASSWKRD,OCC,OCCSOC,IND,INDNAICS,INCWAGE,PWSTATE2,PWCOUNTY,PWPUMA00,TRANWORK,REPWTP1,REPWTP2,REPWTP3,REPWTP4,REPWTP5,REPWTP6,REPWTP7,REPWTP8,REPWTP9,REPWTP10,REPWTP11,REPWTP12,REPWTP13,REPWTP14,REPWTP15,REPWTP16,REPWTP17,REPWTP18,REPWTP19,REPWTP20,REPWTP21,REPWTP22,REPWTP23,REPWTP24,REPWTP25,REPWTP26,REPWTP27,REPWTP28,REPWTP29,REPWTP30,REPWTP31,REPWTP32,REPWTP33,REPWTP34,REPWTP35,REPWTP36,REPWTP37,REPWTP38,REPWTP39,REPWTP40,REPWTP41,REPWTP42,REPWTP43,REPWTP44,REPWTP45,REPWTP46,REPWTP47,REPWTP48,REPWTP49,REPWTP50,REPWTP51,REPWTP52,REPWTP53,REPWTP54,REPWTP55,REPWTP56,REPWTP57,REPWTP58,REPWTP59,REPWTP60,REPWTP61,REPWTP62,REPWTP63,REPWTP64,REPWTP65,REPWTP66,REPWTP67,REPWTP68,REPWTP69,REPWTP70,REPWTP71,REPWTP72,REPWTP73,REPWTP74,REPWTP75,REPWTP76,REPWTP77,REPWTP78,REPWTP79,REPWTP80,stpuma_id,StateFIPS,PUMA_y,GEOID10,NAME,Subregion,Subregion2,Region,County,CountyFIPS
1,2019,2015,201903,1211600,2015000000641,12.0,2019012116003,9,5,1,0,500,50009,1,1,12.0,1,83,7,71,0,0,0,0,0,0,0,0,0,0,0,10,10,24,3,20,10,3,4,3,12,18,4,14,12,13,11,19,22,11,12,11,12,20,3,21,12,4,4,3,12,17,4,11,13,11,13,20,17,14,12,12,12,22,4,20,12,4,4,3,13,19,3,12,10,12,12,20,21,12,10,11,12,18,4,20,13,3,4,3,12,22,4,11,10,11,11,21,25,11,11,900500,9.0,500.0,900500,Litchfield County PUMA,CT,CT,Region,Litchfield,9005.0
2,2019,2015,201903,1211601,2015000000837,12.0,2019012116013,9,1,4,0,100,10009,1,1,13.0,1,66,10,101,2,28,3255,291141,7860,6111,37769,9,1,100,10,13,22,13,12,22,13,23,4,4,13,12,13,20,11,4,13,4,4,22,13,11,20,13,13,21,12,21,3,3,13,13,13,22,12,3,14,4,3,22,12,13,21,14,14,24,11,20,4,4,13,13,12,22,12,4,13,4,4,20,13,12,22,13,12,21,13,21,4,3,12,14,13,25,13,3,14,4,4,22,13,900100,9.0,100.0,900100,"Danbury, Ridgefield, Bethel, Brookfield, New F...",CT,CT,Region,Fairfield,9001.0
3,2019,2015,201903,1211601,2015000000837,12.0,2019012116013,9,1,4,0,100,10009,1,2,14.0,1,68,11,115,1,13,3010,291020,7980,6212,710052,9,1,100,10,13,21,14,13,24,13,22,4,3,14,14,14,21,14,5,14,4,4,23,13,13,21,14,13,23,13,23,4,3,14,14,13,24,14,3,14,4,4,23,14,13,22,14,15,26,11,21,3,4,14,14,14,21,13,4,14,4,4,24,13,14,23,14,13,20,15,23,4,3,12,15,13,26,14,4,15,4,4,24,14,900100,9.0,100.0,900100,"Danbury, Ridgefield, Bethel, Brookfield, New F...",CT,CT,Region,Fairfield,9001.0
9,2019,2015,201903,1211603,2015000001026,13.0,2019012116033,9,1,4,0,105,10509,1,1,13.0,1,35,11,114,2,22,1105,151244,7280,5412,91724,9,1,100,10,4,23,13,23,12,12,19,12,22,4,4,13,12,14,22,14,3,12,4,13,4,24,12,23,12,11,23,13,20,4,3,12,15,13,21,11,4,14,3,12,4,21,13,22,12,12,19,12,22,4,3,12,14,12,21,14,4,13,4,12,4,21,13,24,13,14,24,14,22,4,3,14,11,12,23,11,4,16,4,13,900105,9.0,105.0,900105,"Stratford, Shelton, Trumbull, Newtown & Monroe...",CT,CT,Region,Fairfield,9001.0
10,2019,2015,201903,1211603,2015000001026,13.0,2019012116033,9,1,4,0,105,10509,1,2,12.0,1,31,11,114,2,22,120,113031,6480,5111Z,70142,9,1,100,10,4,22,12,20,13,11,19,11,20,3,4,11,12,13,22,12,3,11,3,12,3,24,12,18,12,12,20,11,19,3,3,11,13,14,21,12,4,13,4,13,4,18,13,18,10,11,19,11,21,4,4,11,14,12,21,13,3,12,4,12,3,21,11,25,13,13,22,12,20,4,3,10,11,12,22,11,4,15,3,11,900105,9.0,105.0,900105,"Stratford, Shelton, Trumbull, Newtown & Monroe...",CT,CT,Region,Fairfield,9001.0


In [14]:
occ = ['119121','191020','191040','1910XX','192030','194021','194031',\
      '194031','1940XX','172131','172141','172031','1720XX','172041','292010']

In [15]:
# REDUCE DATA TABLE TO LIFE SCI OCCUPATION CODES
reg_10_occ=reg_10[reg_10['OCCSOC'].isin(occ)]
reg_19_occ=reg_19[reg_19['OCCSOC'].isin(occ)]

In [16]:
reg_10_occ.shape

(4551, 120)

In [18]:
# reduce table to just counties and subregions of residence 1 year ago
cols10 = ['CountyName','Subregion','PERWT'] + repwts
cols19 = ['County','Subregion','PERWT'] + repwts
reg_10_occ = reg_10_occ[cols10]
reg_19_occ = reg_19_occ[cols19]

# rename for clarity
#df_in = df_in.rename(columns={'County':'out_co_name','CountyFIP':'out_stco',\
##                              'Subregion5':'out_subreg_5','Subregion7':'out_subreg_7',\
#                              'PERWT':'in_pop'})
    
#df_in.head()

#### Subregion life sci worker totals

In [23]:
reg_10_occ_2 = reg_10_occ.groupby('Subregion').sum().reset_index()
reg_10_occ_2

Unnamed: 0,Subregion,PERWT,REPWTP1,REPWTP2,REPWTP3,REPWTP4,REPWTP5,REPWTP6,REPWTP7,REPWTP8,REPWTP9,REPWTP10,REPWTP11,REPWTP12,REPWTP13,REPWTP14,REPWTP15,REPWTP16,REPWTP17,REPWTP18,REPWTP19,REPWTP20,REPWTP21,REPWTP22,REPWTP23,REPWTP24,REPWTP25,REPWTP26,REPWTP27,REPWTP28,REPWTP29,REPWTP30,REPWTP31,REPWTP32,REPWTP33,REPWTP34,REPWTP35,REPWTP36,REPWTP37,REPWTP38,REPWTP39,REPWTP40,REPWTP41,REPWTP42,REPWTP43,REPWTP44,REPWTP45,REPWTP46,REPWTP47,REPWTP48,REPWTP49,REPWTP50,REPWTP51,REPWTP52,REPWTP53,REPWTP54,REPWTP55,REPWTP56,REPWTP57,REPWTP58,REPWTP59,REPWTP60,REPWTP61,REPWTP62,REPWTP63,REPWTP64,REPWTP65,REPWTP66,REPWTP67,REPWTP68,REPWTP69,REPWTP70,REPWTP71,REPWTP72,REPWTP73,REPWTP74,REPWTP75,REPWTP76,REPWTP77,REPWTP78,REPWTP79,REPWTP80
0,CT,10421.0,10877,10270,10266,10358,10341,10884,10812,10339,10481,10090,10860,10602,10544,10073,9910,10283,10446,10196,10638,10682,10226,10284,11204,10115,10225,10658,10774,10492,10161,10151,10413,10283,10303,10433,10386,10553,10672,10696,10219,10367,10326,10822,10619,10472,10655,10455,10327,10683,10003,10580,10336,10307,10374,10386,10616,10733,10442,10711,10196,9949,10366,10299,10521,10524,10191,10225,10347,10217,10247,10934,10206,10317,10568,10253,10857,10498,10731,11196,10005,10425
1,LI,9823.0,9962,9899,10057,9835,10058,9838,9612,9889,9452,10228,10117,9848,9715,10183,9777,9066,9752,10123,9708,9537,9690,10294,9789,9317,9808,9573,10093,10094,9445,10260,9591,9665,10112,10036,9978,9474,9573,9495,9714,9330,10011,9760,9762,9961,10247,10071,9918,10303,9839,9841,9424,9716,9782,9772,9472,9519,9545,9581,10136,9374,9891,9946,9692,9501,10275,9544,10122,9597,9808,9802,9581,9978,9903,9929,9807,9460,10041,9565,10182,9662
2,LowHV,6027.0,5895,5977,6144,5877,6241,6145,6276,5999,6022,5876,6264,6159,6068,5719,6101,5851,5955,6276,5943,6209,6070,5933,6228,5972,5817,6051,6100,6185,6247,5853,6000,5814,6196,5907,5672,6271,6016,5745,6274,5854,5804,6297,5518,5946,6331,6267,5763,5875,5899,5837,6056,6078,5755,6362,6339,6173,6007,5950,6074,6082,6299,6017,5799,6213,5994,6466,5955,5602,5583,6170,5986,6076,6229,5937,6156,6290,6262,6308,6068,6071
3,MidHV,3172.0,3051,3245,3279,2865,3005,3444,3329,3246,2890,3316,2975,3077,3218,3201,3193,3152,3222,3299,3086,3154,3256,3239,3101,3122,3227,3176,3277,3200,3170,3211,3114,2998,3159,3194,3236,3241,3103,3128,3100,3205,3315,3411,3177,3124,3243,2977,3196,3231,3368,3133,3039,3212,3029,3185,3204,2888,3197,3223,3248,2973,3392,3195,3099,3046,3260,3097,3125,3137,3323,3401,3140,3127,3053,2989,3563,3173,2926,3051,3282,3191
4,NJIn,30781.0,31064,30105,29888,31501,31404,31509,30554,31968,30799,31031,30606,30852,30968,31411,29901,31209,31348,31613,31568,30571,30673,30953,30720,30597,30116,29920,30782,31087,31016,30825,31436,30644,32026,30460,30429,30997,30696,30711,30436,31174,30933,30587,31311,31666,30975,30984,30230,30883,31121,30977,30705,30355,31062,30719,30401,31667,30458,30509,30618,31524,30941,30173,30844,31051,32205,30761,31183,30790,30396,31416,29767,30534,31171,31129,30802,31444,30536,30402,30935,30249
5,NJOut,9929.0,10114,9554,10098,10128,9927,10004,9987,10124,10462,9541,9859,9583,9685,9900,9746,9792,9577,10024,10347,9951,9783,9507,9923,9692,10033,10260,10193,9934,10034,9908,9924,9929,10054,10134,10104,10070,10153,9836,9968,9724,9905,9990,10159,10230,9957,9688,10058,9419,10186,10104,9852,10143,10163,10348,9803,10034,10257,9880,9969,9561,10049,10014,9520,9795,9849,9985,9803,9707,9710,9846,10056,9971,10077,9967,10099,9883,9952,10483,9728,10077
6,NYC,25381.0,24906,25204,25327,25262,24551,26112,25713,26010,24792,24715,25462,26646,25628,25811,26075,26244,25152,26180,25349,25858,25654,25303,25350,25357,25130,25179,25585,26432,25192,25276,24886,25106,24778,25192,25010,24765,25457,25981,25486,25476,24615,25828,25096,26024,25944,25927,25294,25396,26015,24995,24472,25858,25982,25947,25245,25886,26138,25921,24631,24971,25207,25212,25673,25010,24815,25108,25114,25545,24817,25187,25360,25720,25281,25253,25753,24819,25630,25806,25880,24808


In [24]:
# calculate standard error, margin of error, cv
# drop replicate weight columns
reg_10_occ_2['worker_se'] = reg_10_occ_2.apply(lambda x: (get_se(x['PERWT'],x[repwts])),axis=1)
reg_10_occ_2['worker_moe'] = reg_10_occ_2.apply(lambda x: (get_moe(x['worker_se'])),axis=1)
reg_10_occ_2['worker_cv'] = reg_10_occ_2.apply(lambda x: (get_cv(x['PERWT'],x['worker_se'])),axis=1)

reg_10_occ_2 = reg_10_occ_2.drop(columns=repwts) 
                            
reg_10_occ_2

Unnamed: 0,Subregion,PERWT,worker_se,worker_moe,worker_cv
0,CT,10421.0,530.476201,872.633351,3.094501
1,LI,9823.0,535.353014,880.655709,3.313067
2,LowHV,6027.0,400.391683,658.644319,4.03848
3,MidHV,3172.0,256.711414,422.290276,4.919785
4,NJIn,30781.0,1012.946741,1666.297389,2.000497
5,NJOut,9929.0,438.874526,721.948596,2.687008
6,NYC,25381.0,959.156687,1577.81275,2.297285


### 2019 life sci regional workers

In [25]:
reg_19_occ_2 = reg_19_occ.groupby('Subregion').sum().reset_index()
reg_19_occ_2

Unnamed: 0,Subregion,PERWT,REPWTP1,REPWTP2,REPWTP3,REPWTP4,REPWTP5,REPWTP6,REPWTP7,REPWTP8,REPWTP9,REPWTP10,REPWTP11,REPWTP12,REPWTP13,REPWTP14,REPWTP15,REPWTP16,REPWTP17,REPWTP18,REPWTP19,REPWTP20,REPWTP21,REPWTP22,REPWTP23,REPWTP24,REPWTP25,REPWTP26,REPWTP27,REPWTP28,REPWTP29,REPWTP30,REPWTP31,REPWTP32,REPWTP33,REPWTP34,REPWTP35,REPWTP36,REPWTP37,REPWTP38,REPWTP39,REPWTP40,REPWTP41,REPWTP42,REPWTP43,REPWTP44,REPWTP45,REPWTP46,REPWTP47,REPWTP48,REPWTP49,REPWTP50,REPWTP51,REPWTP52,REPWTP53,REPWTP54,REPWTP55,REPWTP56,REPWTP57,REPWTP58,REPWTP59,REPWTP60,REPWTP61,REPWTP62,REPWTP63,REPWTP64,REPWTP65,REPWTP66,REPWTP67,REPWTP68,REPWTP69,REPWTP70,REPWTP71,REPWTP72,REPWTP73,REPWTP74,REPWTP75,REPWTP76,REPWTP77,REPWTP78,REPWTP79,REPWTP80
0,CT,8715.0,8855,9111,8886,9015,9359,8628,8676,9165,8441,8784,8142,8816,8461,8345,8954,8464,8706,8844,8762,9375,8654,8808,8599,8596,8492,9078,8684,8496,8735,8491,8962,8714,8600,8271,8815,8713,8564,8957,8743,8929,8452,8836,8737,8909,8956,8635,8736,8693,8439,8440,8691,8813,8929,8761,8442,8615,9238,8911,8908,8964,8570,8743,8108,8339,8934,9316,9165,8430,8935,8627,8471,8317,8580,8771,8670,9062,8733,8563,8664,8219
1,LI,8915.0,8978,8985,9265,8884,9036,8641,8690,9186,8733,8395,9101,8500,8694,8992,8543,9417,9009,8696,9294,8629,8907,9055,8612,8927,9223,8215,8794,8910,8804,8813,9103,9051,8887,9533,9371,8885,9464,8781,8622,8776,9304,9141,8353,8986,8747,8631,8669,9244,8756,9165,8690,9285,8982,8517,9000,9437,9179,9083,9116,8563,8829,9161,8908,8915,9080,8540,9122,8765,8771,8712,8944,8566,8902,8843,9085,9227,8824,8822,8946,8843
2,Low Hud,4698.0,4861,4610,4711,4791,4680,4661,4755,4543,4397,4444,5022,4618,4743,4795,4589,4880,4942,4716,4785,4793,4947,4680,4657,4698,4401,4751,4482,4347,4729,4632,4924,4818,4537,4794,4694,4673,4644,4746,4672,4688,4704,4808,4962,4658,4569,4823,4632,4790,4668,4851,4746,4544,4638,4570,4809,4961,4662,4666,4391,5072,4642,4740,4364,4485,4651,4619,4528,4633,4835,4936,4431,4516,4719,4424,4454,5025,4830,4963,5060,4969
3,Mid Hud,2973.0,2630,3116,2969,2774,2919,3008,3067,3029,2720,3049,3166,3238,2984,2971,2907,3099,3193,2980,2926,2909,3137,3017,2956,3164,2799,2928,2915,2942,3160,3276,2916,2986,3055,2959,2745,2823,2996,2801,2842,3111,3138,3223,2823,2653,2949,3024,2965,2977,2736,2871,2872,2860,3113,2896,3101,2821,2828,2780,2653,3024,2771,3013,3217,3204,3333,3106,2983,3049,3012,3233,2832,2934,2976,3090,3090,2928,2975,2929,2848,3139
4,NJ In,28360.0,28545,28413,28987,28535,28318,28314,28090,28641,28408,28237,28640,28611,28190,27917,28141,27504,27763,28761,28483,27715,28678,28156,29252,27688,28721,28627,27263,28772,27802,28560,28518,28262,28578,28682,28567,28154,28190,28580,29196,28068,29243,28532,28463,29185,28451,28565,27310,27694,27865,28312,28102,28552,28648,27642,28297,29012,28485,28463,27774,28301,27834,28797,28406,28292,28673,28561,27930,27858,29332,29260,27999,27790,28302,27869,28594,28396,27947,29087,28317,27950
5,NJ Out,6923.0,7094,6850,7050,6489,6542,6877,6932,7207,6888,6706,7076,6552,6683,7027,6925,6790,7285,7011,6886,7083,7044,7103,6874,6749,6696,6762,6932,7284,6874,6939,6845,7094,7051,7011,6946,7017,6944,6981,7262,7551,7126,7328,6617,7045,7276,6684,6809,6638,6892,7497,7126,7065,6742,6756,7102,7073,6864,6676,6905,6487,7029,6857,6706,7044,6510,6916,7145,7222,7099,6569,7228,6910,6776,6710,6887,6956,6960,6995,6887,7241
6,NYC,24207.0,22895,23663,24105,24485,24473,24457,23364,23826,25070,23773,24786,25077,23526,24145,24901,24385,24379,24507,23832,24386,24768,24063,24553,24274,25094,24337,24051,24310,24372,23834,24235,24981,24208,24449,24248,24343,24275,24504,24415,24142,24160,24497,23976,24762,24391,24217,24124,24447,24930,24580,24593,24266,23684,24173,24665,25299,24527,23755,23409,24212,24672,24272,24175,24885,24093,24330,23253,24114,23791,24085,24557,24126,24639,24136,23681,24515,24582,23533,24310,24060


In [26]:
# calculate standard error, margin of error, cv
# drop replicate weight columns
reg_19_occ_2['worker_se'] = reg_19_occ_2.apply(lambda x: (get_se(x['PERWT'],x[repwts])),axis=1)
reg_19_occ_2['worker_moe'] = reg_19_occ_2.apply(lambda x: (get_moe(x['worker_se'])),axis=1)
reg_19_occ_2['worker_cv'] = reg_19_occ_2.apply(lambda x: (get_cv(x['PERWT'],x['worker_se'])),axis=1)

reg_19_occ_2 = reg_19_occ_2.drop(columns=repwts) 
                            
reg_19_occ_2

Unnamed: 0,Subregion,PERWT,worker_se,worker_moe,worker_cv
0,CT,8715.0,531.701044,874.648217,3.708807
1,LI,8915.0,538.938958,886.554585,3.674958
2,Low Hud,4698.0,342.274743,563.041953,4.428901
3,Mid Hud,2973.0,302.950739,498.353965,6.194571
4,NJ In,28360.0,907.074832,1492.138098,1.944335
5,NJ Out,6923.0,445.337905,732.580854,3.910474
6,NYC,24207.0,888.603483,1461.75273,2.231522


### Merge in & out to create net flow columns

fix up the crosswalk files to make sure this can be done

In [27]:
reg_19_occ_2.to_excel('../output/lifesci_2019.xlsx')
reg_10_occ_2.to_excel('../output/lifesci_2010.xlsx')