In [1]:
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn import preprocessing
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

### Indicator 1 - Renters who pay more than 50% of their income on housing

In [2]:
rent = pd.read_csv('../data/rent-as-percentage-income/ACSDT5Y2018.B25070_data_with_overlays_2020-11-19T201947.csv',
                   low_memory=False, skiprows=[1],
                   usecols=['NAME','B25070_010E','B25070_001E','B25070_011E'])
rent.rename(columns={'B25070_010E': 'paying_50p_more', 'B25070_001E': 'Total',
                    'B25070_011E': 'Total_not_comp'}, inplace = True)

rent.head()

Unnamed: 0,NAME,Total,paying_50p_more,Total_not_comp
0,"Census Tract 37, New York County, New York",789,127,61
1,"Census Tract 86.01, New York County, New York",1001,204,49
2,"Census Tract 44, New York County, New York",8145,1439,224
3,"Census Tract 86.03, New York County, New York",736,204,16
4,"Census Tract 86.02, New York County, New York",0,0,0


In [3]:
rent['pay over 50'] = (rent['paying_50p_more'] / (rent['Total'] - rent['Total_not_comp'])).round(decimals=2)
rent

Unnamed: 0,NAME,Total,paying_50p_more,Total_not_comp,pay over 50
0,"Census Tract 37, New York County, New York",789,127,61,0.17
1,"Census Tract 86.01, New York County, New York",1001,204,49,0.21
2,"Census Tract 44, New York County, New York",8145,1439,224,0.18
3,"Census Tract 86.03, New York County, New York",736,204,16,0.28
4,"Census Tract 86.02, New York County, New York",0,0,0,
5,"Census Tract 165, New York County, New York",1999,252,55,0.13
6,"Census Tract 279, New York County, New York",3679,1038,172,0.30
7,"Census Tract 112.03, New York County, New York",371,113,18,0.32
8,"Census Tract 184, New York County, New York",2901,731,76,0.26
9,"Census Tract 36.01, New York County, New York",1303,248,40,0.20


In [4]:
# # rows with NaN - because has 0's for the columns being used for calculations
# rent[rent.isna().any(axis=1)]

# # rent table without  NaN 

# rent.dropna()

### Indicator 2 - Amount of renters who have too little left ($12K) after paying housing cost

In [5]:
income = pd.read_csv('../data/low-income/ACSDT5Y2018.B25074_data_with_overlays_2020-12-10T140704.csv',
                     low_memory=False, skiprows=[1],
                     usecols=['NAME','B25074_001E','B25074_002E','B25074_013E','B25074_014E',
                             'B25074_015E','B25074_016E','B25074_017E','B25074_018E',
                             'B25074_027E'],
                     dtype={'B25074_001E': np.float64, 'B25074_002E': np.float64,
                           'B25074_013E': np.float64,'B25074_014E': np.float64,
                           'B25074_015E': np.float64,'B25074_016E': np.float64,
                           'B25074_017E': np.float64,'B25074_018E': np.float64,
                           'B25074_027E': np.float64})

income.head(3)

Unnamed: 0,NAME,B25074_001E,B25074_002E,B25074_013E,B25074_014E,B25074_015E,B25074_016E,B25074_017E,B25074_018E,B25074_027E
0,"Census Tract 429.02, Bronx County, New York",1442.0,178.0,0.0,0.0,14.0,0.0,0.0,322.0,158.0
1,"Census Tract 330, Bronx County, New York",2024.0,215.0,19.0,34.0,19.0,57.0,80.0,287.0,133.0
2,"Census Tract 358, Bronx County, New York",834.0,0.0,0.0,0.0,0.0,0.0,0.0,99.0,98.0


In [6]:
#dropping first row and renaming columns
income.columns = ['census-tract','total-renters','income-less-10k','income-15k-B25074_013E','income-15k-B25074_014E',
                             'income-15k-B25074_015E','income-15k-B25074_016E','income-15k-B25074_017E','income-15k-B25074_018E',
                             'income-27.5k-B25074_027E']
income = income.drop(index = 0, axis = 0)
income.head(3)

Unnamed: 0,census-tract,total-renters,income-less-10k,income-15k-B25074_013E,income-15k-B25074_014E,income-15k-B25074_015E,income-15k-B25074_016E,income-15k-B25074_017E,income-15k-B25074_018E,income-27.5k-B25074_027E
1,"Census Tract 330, Bronx County, New York",2024.0,215.0,19.0,34.0,19.0,57.0,80.0,287.0,133.0
2,"Census Tract 358, Bronx County, New York",834.0,0.0,0.0,0.0,0.0,0.0,0.0,99.0,98.0
3,"Census Tract 371, Bronx County, New York",1558.0,235.0,37.0,69.0,120.0,58.0,70.0,37.0,155.0


In [7]:
# total amount of renters with less than 12k after paying rent

income['total-renters-less12k']= income[['income-less-10k','income-15k-B25074_013E','income-15k-B25074_014E',
                             'income-15k-B25074_015E','income-15k-B25074_016E','income-15k-B25074_017E','income-15k-B25074_018E',
                             'income-27.5k-B25074_027E']].sum(axis =1)
income[['census-tract','total-renters','total-renters-less12k']].head()

Unnamed: 0,census-tract,total-renters,total-renters-less12k
1,"Census Tract 330, Bronx County, New York",2024.0,844.0
2,"Census Tract 358, Bronx County, New York",834.0,197.0
3,"Census Tract 371, Bronx County, New York",1558.0,781.0
4,"Census Tract 385, Bronx County, New York",1648.0,947.0
5,"Census Tract 421, Bronx County, New York",2056.0,760.0


In [8]:
#as a portion of total renters

income['percentage-renters-less12k'] = income['total-renters-less12k'].astype(float) / income['total-renters'].astype(float)

income[['census-tract','total-renters','total-renters-less12k','percentage-renters-less12k' ]].head()

Unnamed: 0,census-tract,total-renters,total-renters-less12k,percentage-renters-less12k
1,"Census Tract 330, Bronx County, New York",2024.0,844.0,0.416996
2,"Census Tract 358, Bronx County, New York",834.0,197.0,0.236211
3,"Census Tract 371, Bronx County, New York",1558.0,781.0,0.501284
4,"Census Tract 385, Bronx County, New York",1648.0,947.0,0.574636
5,"Census Tract 421, Bronx County, New York",2056.0,760.0,0.36965


### Indicator 3 - UI coverage rate

#### Method
\begin{align*}
&\text{UI coverage rate} = \frac{\text{private-sector workers in the UI program}}{\text{number of workers in the private for-profit and nonprofit sectors}}\\
\\
&\text{non-UI coverage rate} = 1-\text{UI coverage rate}
\end{align*}

where the private-sector workers in the UI program comes from **Total number of jobs (C000)** in Private Jobs in  Longitudinal Employer–Household Dynamics (LEHD) for 2013‒17 (the five most recent years available), and the estimated labor force comes from **Private wage and salary workers (DP03_0047E)** in the corresponding 2013‒17 ACS. 

Data collection and cleanning details can be found in **UCLA_UI.ipnb**

In [9]:
acs = pd.read_csv('../data/UI_coverage/ACS_DP03_cleaned.csv', low_memory=False)
rac_avg = pd.read_csv('../data/UI_coverage/ny_rac_cleaned.csv', low_memory=False)

In [10]:
ui_coverage = acs.merge(rac_avg, left_on='GEO_ID', right_on='trct', how='left')
ui_coverage['ui_rate'] = ui_coverage['total_ui_jobs'] / ui_coverage['total_jobs']
ui_coverage = ui_coverage[['GEO_ID', 'NAME', 'total_ui_jobs', 'total_jobs', 'ui_rate']]

print('# ui rate>1:', sum(ui_coverage['ui_rate']>1))
print(ui_coverage[~np.isinf(ui_coverage.ui_rate)].ui_rate.describe())

# ui rate>1: 1016
count    2117.000000
mean        1.076616
std         0.913469
min         0.444891
25%         0.872669
50%         0.982034
75%         1.108605
max        31.350000
Name: ui_rate, dtype: float64


In [11]:
ui_coverage.loc[ui_coverage['ui_rate']>1, 'ui_rate'] = 1
ui_coverage.head()

Unnamed: 0,GEO_ID,NAME,total_ui_jobs,total_jobs,ui_rate
0,36061000100,"Census Tract 1, New York County, New York",175.2,0.0,1.0
1,36061000201,"Census Tract 2.01, New York County, New York",1087.8,892.0,1.0
2,36061000202,"Census Tract 2.02, New York County, New York",2427.25,2365.0,1.0
3,36061000500,"Census Tract 5, New York County, New York",167.35,0.0,1.0
4,36061000600,"Census Tract 6, New York County, New York",3531.35,2481.0,1.0


### Indicator 3 - Unemployment rate

In [12]:
employment = pd.read_csv('../data/employment_ACSDP03_2018.csv',
                         low_memory=False, skiprows=[1],
                         usecols=['GEO_ID','NAME','DP03_0003E','DP03_0005E','DP03_0037E'])

In [13]:
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0,0,0
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873,295,57
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052,244,207
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477,164,306
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451,192,103


In [14]:
employment['DP03_0003E']=employment['DP03_0003E'].astype(float)
employment['DP03_0005E']=employment['DP03_0005E'].astype(float)

In [15]:
employment['unemployment rate'] = (100 * employment['DP03_0005E'] / employment['DP03_0003E']).round(decimals=2)
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E,unemployment rate
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0.0,0.0,0,
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873.0,295.0,57,15.75
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052.0,244.0,207,7.99
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477.0,164.0,306,6.62
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451.0,192.0,103,13.23


### Indicator 4 - employment in impacted sectors (retail sales, hospitality service, personal care)

In [16]:
"""
from Paul Ong:

ACS18_5yr:C24010: Sex by Occupation for the Civilian Employed Population 16 Years And Over
 
Service Occupations:
Personal Care And Service Occupations (be sure to add both Male and Female counts)

------
ACS18_5yr:C24050: Industry by Occupation for the Civilian Employed Population 16 Years And Over

Service Occupations:
Arts, Entertainment, And Recreation, And Accommodation And Food Services

Sales and Office Occupations:
Retail Trade
 
% of Workers At Risk of Job Displacement =
 
(Personal Care And Service Occupations + Arts, Entertainment, and Recreation, and 
Accommodation and Food Services + Retail Trade) / (Total Civilian Employed Population 16 Years And Over)
"""


'\nfrom Paul Ong:\n\nACS18_5yr:C24010: Sex by Occupation for the Civilian Employed Population 16 Years And Over\n \nService Occupations:\nPersonal Care And Service Occupations (be sure to add both Male and Female counts)\n\n------\nACS18_5yr:C24050: Industry by Occupation for the Civilian Employed Population 16 Years And Over\n\nService Occupations:\nArts, Entertainment, And Recreation, And Accommodation And Food Services\n\nSales and Office Occupations:\nRetail Trade\n \n% of Workers At Risk of Job Displacement =\n \n(Personal Care And Service Occupations + Arts, Entertainment, and Recreation, and \nAccommodation and Food Services + Retail Trade) / (Total Civilian Employed Population 16 Years And Over)\n'

In [17]:
sexByOccop = pd.read_csv('../data/sexByOccupation/ACSDT5Y2018.C24010_data_with_overlays_2020-12-10T155813.csv',
                   low_memory=False, usecols = ['NAME', 'C24010_026E', 'C24010_062E'])
sexByOccop.head()

Unnamed: 0,NAME,C24010_026E,C24010_062E
0,Geographic Area Name,Estimate!!Total!!Male!!Service occupations!!Pe...,Estimate!!Total!!Female!!Service occupations!!...
1,"Census Tract 37, New York County, New York",11,23
2,"Census Tract 86.01, New York County, New York",0,12
3,"Census Tract 44, New York County, New York",173,334
4,"Census Tract 86.03, New York County, New York",9,17


In [18]:
indusByOccup = pd.read_csv('../data/industryByOccupation/ACSDT5Y2018.C24050_data_with_overlays_2020-12-10T155554.csv',
                          low_memory=False, usecols=['NAME','C24050_040E','C24050_048E', 'C24050_001E'])

indusByOccup.head()

Unnamed: 0,NAME,C24050_001E,C24050_040E,C24050_048E
0,Geographic Area Name,Estimate!!Total,"Estimate!!Total!!Service occupations!!Arts, en...",Estimate!!Total!!Sales and office occupations!...
1,"Census Tract 37, New York County, New York",1662,11,85
2,"Census Tract 86.01, New York County, New York",2198,0,27
3,"Census Tract 44, New York County, New York",10562,425,665
4,"Census Tract 86.03, New York County, New York",2557,9,65


In [19]:
indusByOccup[1:].head()

Unnamed: 0,NAME,C24050_001E,C24050_040E,C24050_048E
1,"Census Tract 37, New York County, New York",1662,11,85
2,"Census Tract 86.01, New York County, New York",2198,0,27
3,"Census Tract 44, New York County, New York",10562,425,665
4,"Census Tract 86.03, New York County, New York",2557,9,65
5,"Census Tract 86.02, New York County, New York",0,0,0


In [20]:
indic3= (sexByOccop['C24010_026E'][1:].astype(float) + sexByOccop['C24010_062E'][1:].astype(float)+ 
          indusByOccup['C24050_040E'][1:].astype(float) + 
          indusByOccup['C24050_048E'][1:].astype(float))/ indusByOccup['C24050_001E'][1:].astype(float)

indic3 = pd.DataFrame(indic3)
indic3['census-tract'] = indusByOccup['NAME']
indic3.head()

# column 0 is % of Workers At Risk of Job Displacement 

Unnamed: 0,0,census-tract
1,0.078219,"Census Tract 37, New York County, New York"
2,0.017743,"Census Tract 86.01, New York County, New York"
3,0.151202,"Census Tract 44, New York County, New York"
4,0.039108,"Census Tract 86.03, New York County, New York"
5,,"Census Tract 86.02, New York County, New York"


### Indicator6  - CARES Act Exclusion

In [21]:
# Import and clean the information about total population from ACS
dp05 = pd.read_csv("../data/CARES-exclusion/ACSDP5Y2018.DP05_data_with_overlays_2020-11-20T095617.csv",
                   usecols = ["GEO_ID","NAME","DP05_0001E"], skiprows=[1])
dp05.rename(columns = {"DP05_0001E" : "total_population"}, inplace = True)
dp05.head()

Unnamed: 0,GEO_ID,NAME,total_population
0,1400000US36061000100,"Census Tract 1, New York County, New York",0
1,1400000US36061000201,"Census Tract 2.01, New York County, New York",2835
2,1400000US36061000202,"Census Tract 2.02, New York County, New York",7764
3,1400000US36061000500,"Census Tract 5, New York County, New York",0
4,1400000US36061000600,"Census Tract 6, New York County, New York",9731


In [22]:
# Import data about population that's in IRS' system
irs = pd.read_csv('../data/CARES-exclusion/18zp33ny.csv', skiprows=[0, 1, 2, 4, 5], 
                       usecols = ["ZIP\ncode [1]", "Size of adjusted gross income", 
                                  "Number of returns", "Number of single returns", 
                                  "Number of joint returns", "Number of head of household returns", 
                                  "Number of \nindividuals [2]", "Number of dependents"], 
                  dtype = np.str)

irs.rename(columns={"ZIP\ncode [1]": "ZIP", "Number of \nindividuals [2]": "Number of individuals"}, inplace = True)

irs.head(15)

Unnamed: 0,ZIP,Size of adjusted gross income,Number of returns,Number of single returns,Number of joint returns,Number of head of household returns,Number of individuals,Number of dependents
0,0.0,Total,9632130.0,5003730.0,2947270.0,1460570.0,17260550.0,5256960.0
1,0.0,"$1 under $25,000",3161790.0,2230890.0,384320.0,496390.0,4289760.0,1269570.0
2,0.0,"$25,000 under $50,000",2223030.0,1239350.0,433380.0,499300.0,3878510.0,1246870.0
3,0.0,"$50,000 under $75,000",1365250.0,709530.0,379140.0,236500.0,2417530.0,680400.0
4,0.0,"$75,000 under $100,000",871610.0,353000.0,378620.0,110550.0,1726310.0,476400.0
5,0.0,"$100,000 under $200,000",1376380.0,362150.0,879400.0,102230.0,3245060.0,993140.0
6,0.0,"$200,000 or more",634070.0,108810.0,492410.0,15600.0,1703380.0,590580.0
7,,,,,,,,
8,10001.0,,16150.0,11740.0,2800.0,1160.0,21760.0,3350.0
9,10001.0,"$1 under $25,000",3680.0,2900.0,300.0,370.0,4390.0,800.0


In [23]:
irs_zip = irs[irs["ZIP"].notna() & irs["Size of adjusted gross income"].isna() & irs["Number of returns"].notna()].copy()

In [24]:
for col in irs_zip.columns[2:]:
    irs_zip[col] = irs_zip[col].str.replace(",", "").astype(int) 
irs_zip.dtypes

ZIP                                    object
Size of adjusted gross income          object
Number of returns                       int32
Number of single returns                int32
Number of joint returns                 int32
Number of head of household returns     int32
Number of individuals                   int32
Number of dependents                    int32
dtype: object

In [25]:
# Import the crosstalk file linking zip code to census track
zip_to_track = pd.read_csv("../data/CARES-exclusion/ZIP_TRACT_122018.csv", dtype = {"zip":str, "tract":str})
zip_to_track.head()

Unnamed: 0,zip,tract,res_ratio,bus_ratio,oth_ratio,tot_ratio
0,601,72001956700,0.671739,0.411079,0.530973,0.654116
1,603,72005401302,0.082247,0.036098,0.072491,0.079523
2,623,72023830400,0.331718,0.551763,0.47973,0.344927
3,638,72039955700,0.436658,0.819444,0.777778,0.455135
4,646,72051540100,0.142668,0.342226,0.148515,0.156662


In [26]:
irs_zt = zip_to_track.join(irs_zip[["ZIP", "Number of individuals"]].set_index("ZIP"), on = "zip", how = "left")
irs_zt = irs_zt[irs_zt["Number of individuals"].notna()]
irs_zt.head()

Unnamed: 0,zip,tract,res_ratio,bus_ratio,oth_ratio,tot_ratio,Number of individuals
7891,10001,36061009500,0.08999,0.230449,0.226739,0.149923,21760.0
7892,10001,36061009100,0.099933,0.056752,0.07544,0.082844,21760.0
7893,10002,36061002201,0.085143,0.035253,0.063872,0.080472,67080.0
7894,10001,36061010300,0.125852,0.030322,0.04736,0.086195,21760.0
7895,10013,36061003300,0.233373,0.208787,0.218345,0.225716,24330.0


In [27]:
irs_zt["N_corrected"] = irs_zt["tot_ratio"] * irs_zt["Number of individuals"]
irs_by_tract = irs_zt[["tract", "N_corrected"]].groupby("tract").sum()
irs_by_tract.head()

Unnamed: 0_level_0,N_corrected
tract,Unnamed: 1_level_1
36001000100,1468.305674
36001000200,2951.837908
36001000300,4900.253672
36001000401,2350.77277
36001000403,3067.276198


In [28]:
dp05["tract"] = [i[9:] for i in dp05["GEO_ID"]]
dp05_irs = dp05.join(irs_by_tract, on = "tract")
dp05_irs["CARES_exclusion"] = 1 - dp05_irs["N_corrected"] / dp05_irs["total_population"]
dp05_irs.head()
# ? What do we want to do about the tracts where the total_population is zero or there is no data from the irs?

Unnamed: 0,GEO_ID,NAME,total_population,tract,N_corrected,CARES_exclusion
0,1400000US36061000100,"Census Tract 1, New York County, New York",0,36061000100,3.679345,-inf
1,1400000US36061000201,"Census Tract 2.01, New York County, New York",2835,36061000201,1352.964425,0.522764
2,1400000US36061000202,"Census Tract 2.02, New York County, New York",7764,36061000202,6075.198137,0.217517
3,1400000US36061000500,"Census Tract 5, New York County, New York",0,36061000500,,
4,1400000US36061000600,"Census Tract 6, New York County, New York",9731,36061000600,8625.930297,0.113562


In [29]:
# the proportion of population not covered readily by the CARES Act in New York State
print("Number of people not readily covered by the CARES Act in New York State:")
print(dp05_irs.sum()["total_population"] - dp05_irs.sum()["N_corrected"])
print("Number of undocumented immigrants based on a report from Migration Policy Institute")
# https://www.migrationpolicy.org/data/unauthorized-immigrant-population/state/NY
print(866000)
# These are on the same magnitude
print("Proportion of population")
print(100 - 100* dp05_irs.sum()["N_corrected"] / dp05_irs.sum()["total_population"], "%")
# Similar to results from the LA data

Number of people not readily covered by the CARES Act in New York State:
1192894.275184908
Number of undocumented immigrants based on a report from Migration Policy Institute
866000
Proportion of population
14.127603285247943 %


In [30]:
dp05_irs["CARES_exclusion_c"] = [i if i > 0 else 0 for i in dp05_irs["CARES_exclusion"]]
dp05_irs.replace([-np.inf], 0, inplace = True)
# note that this makes all tracks with zero population have a CARES_exclusion of zero

In [31]:
dp05_irs.to_csv("../data/CARES_exclusion_cleaned.csv")

## Combine all indicators

In [32]:
risk_jobs = indic3.rename(columns={0:"Risk of Job Displacement", "census-tract": "NAME"})

In [33]:
data_merged = dp05_irs[["GEO_ID", "NAME", "tract","total_population", "CARES_exclusion_c"]]\
              .merge(rent[["NAME", "pay over 50"]], on = "NAME", how= "left")\
              .merge(income[['census-tract','percentage-renters-less12k']], 
                     left_on = "NAME", right_on = "census-tract", how = "left")\
              .merge(ui_coverage[["NAME", "ui_rate"]], on = "NAME", how = "left")\
              .merge(employment[["NAME", "unemployment rate"]], on = "NAME", how = "left")\
              .merge(risk_jobs, on = "NAME", how = "left")\
              .drop("census-tract", axis = 1)
data_merged.head(5)

Unnamed: 0,GEO_ID,NAME,tract,total_population,CARES_exclusion_c,pay over 50,percentage-renters-less12k,ui_rate,unemployment rate,Risk of Job Displacement
0,1400000US36061000100,"Census Tract 1, New York County, New York",36061000100,0,0.0,,,1.0,,
1,1400000US36061000201,"Census Tract 2.01, New York County, New York",36061000201,2835,0.522764,0.23,0.399627,1.0,4.05,0.295775
2,1400000US36061000202,"Census Tract 2.02, New York County, New York",36061000202,7764,0.217517,0.14,0.389513,1.0,13.81,0.239418
3,1400000US36061000500,"Census Tract 5, New York County, New York",36061000500,0,0.0,,,1.0,,
4,1400000US36061000600,"Census Tract 6, New York County, New York",36061000600,9731,0.113562,0.21,0.568819,1.0,8.27,0.271813


In [34]:
list_binned_cols = []
for i in data_merged.columns[4:10]:
    new_col = i +" bin"
    list_binned_cols.append(new_col)
    data_merged[new_col] = pd.cut(data_merged[i], 5, labels=[1,2,3,4,5])
data_merged.head(3)

Unnamed: 0,GEO_ID,NAME,tract,total_population,CARES_exclusion_c,pay over 50,percentage-renters-less12k,ui_rate,unemployment rate,Risk of Job Displacement,CARES_exclusion_c bin,pay over 50 bin,percentage-renters-less12k bin,ui_rate bin,unemployment rate bin,Risk of Job Displacement bin
0,1400000US36061000100,"Census Tract 1, New York County, New York",36061000100,0,0.0,,,1.0,,,1,,,5,,
1,1400000US36061000201,"Census Tract 2.01, New York County, New York",36061000201,2835,0.522764,0.23,0.399627,1.0,4.05,0.295775,3,2.0,2.0,5,1.0,3.0
2,1400000US36061000202,"Census Tract 2.02, New York County, New York",36061000202,7764,0.217517,0.14,0.389513,1.0,13.81,0.239418,2,1.0,2.0,5,3.0,2.0


In [35]:
data_merged["sum_rankings"] = data_merged[list_binned_cols].sum(axis = 1)
data_merged["RVI"] =  pd.cut(data_merged["sum_rankings"], 5, labels=[1,2,3,4,5])

In [36]:
data_merged.head()

Unnamed: 0,GEO_ID,NAME,tract,total_population,CARES_exclusion_c,pay over 50,percentage-renters-less12k,ui_rate,unemployment rate,Risk of Job Displacement,CARES_exclusion_c bin,pay over 50 bin,percentage-renters-less12k bin,ui_rate bin,unemployment rate bin,Risk of Job Displacement bin,sum_rankings,RVI
0,1400000US36061000100,"Census Tract 1, New York County, New York",36061000100,0,0.0,,,1.0,,,1,,,5,,,6.0,1
1,1400000US36061000201,"Census Tract 2.01, New York County, New York",36061000201,2835,0.522764,0.23,0.399627,1.0,4.05,0.295775,3,2.0,2.0,5,1.0,3.0,16.0,4
2,1400000US36061000202,"Census Tract 2.02, New York County, New York",36061000202,7764,0.217517,0.14,0.389513,1.0,13.81,0.239418,2,1.0,2.0,5,3.0,2.0,15.0,3
3,1400000US36061000500,"Census Tract 5, New York County, New York",36061000500,0,0.0,,,1.0,,,1,,,5,,,6.0,1
4,1400000US36061000600,"Census Tract 6, New York County, New York",36061000600,9731,0.113562,0.21,0.568819,1.0,8.27,0.271813,1,2.0,3.0,5,2.0,3.0,16.0,4


In [37]:
data_merged.to_csv("../data/RVI.csv")