#### Identifying potential eligibility-enrollment gap localities using the Social Vulnerability Index data

#### Import the right parameters

In [89]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [90]:
# Read CSV file
df = pd.read_csv("svi_analysis.csv")

In [91]:
# Check the head of csv file
df.head()

Unnamed: 0,State Code,State Name,County Code,COUNTY,FIPS Code,LOCATION,Percentage of persons below 150% poverty estimate,Percentage of persons below 150% poverty estimate MOE,Unemployment Rate estimate,Unemployment Rate estimate MOE,Percentile percentage of persons below 150% poverty estimate,Percentile percentage of civilian (age 16+) unemployed estimate,Sum of series for Socioeconomic Status theme,Percentile ranking for Socioeconomic Status theme summary
0,1,Alabama,1001,Autauga,1001020100,"Census Tract 201, Autauga County, Alabama",18.1,6.1,2.1,2.1,0.4727,0.1731,2.0969,0.3711
1,1,Alabama,1001,Autauga,1001020200,"Census Tract 202, Autauga County, Alabama",25.4,11.0,4.0,3.5,0.6491,0.421,2.1789,0.3964
2,1,Alabama,1001,Autauga,1001020300,"Census Tract 203, Autauga County, Alabama",22.8,7.8,2.7,2.3,0.5917,0.2487,2.0595,0.3593
3,1,Alabama,1001,Autauga,1001020400,"Census Tract 204, Autauga County, Alabama",14.2,5.7,2.4,2.0,0.3598,0.2096,1.6314,0.2292
4,1,Alabama,1001,Autauga,1001020501,"Census Tract 205.01, Autauga County, Alabama",21.0,11.1,1.0,1.3,0.5495,0.0641,1.8133,0.2825


In [92]:
# Check the tail of csv file
df.tail()

Unnamed: 0,State Code,State Name,County Code,COUNTY,FIPS Code,LOCATION,Percentage of persons below 150% poverty estimate,Percentage of persons below 150% poverty estimate MOE,Unemployment Rate estimate,Unemployment Rate estimate MOE,Percentile percentage of persons below 150% poverty estimate,Percentile percentage of civilian (age 16+) unemployed estimate,Sum of series for Socioeconomic Status theme,Percentile ranking for Socioeconomic Status theme summary
84117,56,Wyoming,56043,Washakie,56043000200,"Census Tract 2, Washakie County, Wyoming",16.7,5.6,3.4,2.3,0.4342,0.3415,1.8301,0.2876
84118,56,Wyoming,56043,Washakie,56043000301,"Census Tract 3.01, Washakie County, Wyoming",12.2,6.0,2.5,2.2,0.3,0.2223,2.101,0.3723
84119,56,Wyoming,56043,Washakie,56043000302,"Census Tract 3.02, Washakie County, Wyoming",28.2,11.3,7.0,5.4,0.7029,0.7176,2.9927,0.6624
84120,56,Wyoming,56045,Weston,56045951100,"Census Tract 9511, Weston County, Wyoming",25.8,9.0,3.2,2.8,0.6577,0.3147,2.5434,0.5167
84121,56,Wyoming,56045,Weston,56045951300,"Census Tract 9513, Weston County, Wyoming",15.5,7.9,4.5,3.5,0.3982,0.4829,2.1245,0.3795


In [93]:
# Checking data columns
df.columns

Index(['State Code', 'State Name', 'County Code', 'COUNTY', 'FIPS Code',
       'LOCATION', 'Percentage of persons below 150% poverty estimate',
       'Percentage of persons below 150% poverty estimate MOE',
       'Unemployment Rate estimate', 'Unemployment Rate estimate MOE',
       'Percentile percentage of persons below 150% poverty estimate',
       'Percentile percentage of civilian (age 16+) unemployed estimate',
       'Sum of series for Socioeconomic Status theme',
       'Percentile ranking for Socioeconomic Status theme summary'],
      dtype='object')

In [94]:
# Check sum total of rows and columns
df.nunique()

State Code                                                            51
State Name                                                            51
County Code                                                         3143
COUNTY                                                              1852
FIPS Code                                                          84122
LOCATION                                                           84122
Percentage of persons below 150% poverty estimate                    901
Percentage of persons below 150% poverty estimate MOE                508
Unemployment Rate estimate                                           425
Unemployment Rate estimate MOE                                       413
Percentile percentage of persons below 150% poverty estimate         776
Percentile percentage of civilian (age 16+) unemployed estimate      319
Sum of series for Socioeconomic Status theme                       34980
Percentile ranking for Socioeconomic Status theme s

In [95]:
# Check the shape of csv file
df.shape

(84122, 14)

In [96]:
# Check data info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84122 entries, 0 to 84121
Data columns (total 14 columns):
 #   Column                                                           Non-Null Count  Dtype  
---  ------                                                           --------------  -----  
 0   State Code                                                       84122 non-null  int64  
 1   State Name                                                       84122 non-null  object 
 2   County Code                                                      84122 non-null  int64  
 3   COUNTY                                                           84122 non-null  object 
 4   FIPS Code                                                        84122 non-null  int64  
 5   LOCATION                                                         84122 non-null  object 
 6   Percentage of persons below 150% poverty estimate                84122 non-null  float64
 7   Percentage of persons below 150% poverty

In [97]:
# Check data type
df.dtypes

State Code                                                           int64
State Name                                                          object
County Code                                                          int64
COUNTY                                                              object
FIPS Code                                                            int64
LOCATION                                                            object
Percentage of persons below 150% poverty estimate                  float64
Percentage of persons below 150% poverty estimate MOE              float64
Unemployment Rate estimate                                         float64
Unemployment Rate estimate MOE                                     float64
Percentile percentage of persons below 150% poverty estimate       float64
Percentile percentage of civilian (age 16+) unemployed estimate    float64
Sum of series for Socioeconomic Status theme                       float64
Percentile ranking for So

In [98]:
# Checking for null values
df.isnull().sum()

State Code                                                         0
State Name                                                         0
County Code                                                        0
COUNTY                                                             0
FIPS Code                                                          0
LOCATION                                                           0
Percentage of persons below 150% poverty estimate                  0
Percentage of persons below 150% poverty estimate MOE              0
Unemployment Rate estimate                                         0
Unemployment Rate estimate MOE                                     0
Percentile percentage of persons below 150% poverty estimate       0
Percentile percentage of civilian (age 16+) unemployed estimate    0
Sum of series for Socioeconomic Status theme                       0
Percentile ranking for Socioeconomic Status theme summary          0
dtype: int64

In [99]:
# Check data statistical description
df.describe()

Unnamed: 0,State Code,County Code,FIPS Code,Percentage of persons below 150% poverty estimate,Percentage of persons below 150% poverty estimate MOE,Unemployment Rate estimate,Unemployment Rate estimate MOE,Percentile percentage of persons below 150% poverty estimate,Percentile percentage of civilian (age 16+) unemployed estimate,Sum of series for Socioeconomic Status theme,Percentile ranking for Socioeconomic Status theme summary
count,84122.0,84122.0,84122.0,84122.0,84122.0,84122.0,84122.0,84122.0,84122.0,84122.0,84122.0
mean,27.780272,27868.041761,27868290000.0,22.260361,-0.409977,-2.242474,-3.647747,-5.727023,-7.429575,-5.938282,-7.912153
std,15.911845,15937.596481,15937610000.0,15.451197,93.11346,89.23464,89.086828,78.639694,88.647235,91.496094,91.309095
min,1.0,1001.0,1001020000.0,0.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
25%,12.0,12119.0,12119910000.0,10.4,4.8,2.6,2.3,0.2437,0.2352,1.682,0.2436
50%,27.0,27145.0,27145010000.0,18.9,7.3,4.6,3.4,0.4942,0.4939,2.48045,0.49575
75%,41.0,41067.0,41067030000.0,30.8,10.3,7.4,5.2,0.7474,0.7441,3.280975,0.7479
max,56.0,56045.0,56045950000.0,100.0,400.0,100.0,100.0,0.9994,1.0,4.8725,1.0


In [100]:
# Check Category values in the data
df_var = df.select_dtypes("object").head()
df_var.head()

Unnamed: 0,State Name,COUNTY,LOCATION
0,Alabama,Autauga,"Census Tract 201, Autauga County, Alabama"
1,Alabama,Autauga,"Census Tract 202, Autauga County, Alabama"
2,Alabama,Autauga,"Census Tract 203, Autauga County, Alabama"
3,Alabama,Autauga,"Census Tract 204, Autauga County, Alabama"
4,Alabama,Autauga,"Census Tract 205.01, Autauga County, Alabama"


In [101]:
# Check Numeric values in the data
df_num = df.select_dtypes("number")
df_num.head()

Unnamed: 0,State Code,County Code,FIPS Code,Percentage of persons below 150% poverty estimate,Percentage of persons below 150% poverty estimate MOE,Unemployment Rate estimate,Unemployment Rate estimate MOE,Percentile percentage of persons below 150% poverty estimate,Percentile percentage of civilian (age 16+) unemployed estimate,Sum of series for Socioeconomic Status theme,Percentile ranking for Socioeconomic Status theme summary
0,1,1001,1001020100,18.1,6.1,2.1,2.1,0.4727,0.1731,2.0969,0.3711
1,1,1001,1001020200,25.4,11.0,4.0,3.5,0.6491,0.421,2.1789,0.3964
2,1,1001,1001020300,22.8,7.8,2.7,2.3,0.5917,0.2487,2.0595,0.3593
3,1,1001,1001020400,14.2,5.7,2.4,2.0,0.3598,0.2096,1.6314,0.2292
4,1,1001,1001020501,21.0,11.1,1.0,1.3,0.5495,0.0641,1.8133,0.2825


In [104]:
# Computing column by column top 10
df1= df.groupby(["State Code","County Code","FIPS Code", "State Name", "LOCATION", "Unemployment Rate estimate","Unemployment Rate estimate MOE","Percentile percentage of civilian (age 16+) unemployed estimate","Sum of series for Socioeconomic Status theme","Percentage of persons below 150% poverty estimate MOE"])["Percentage of persons below 150% poverty estimate"].sum().nlargest(n=100).reset_index()

In [105]:
df1

Unnamed: 0,State Code,County Code,FIPS Code,State Name,LOCATION,Unemployment Rate estimate,Unemployment Rate estimate MOE,Percentile percentage of civilian (age 16+) unemployed estimate,Sum of series for Socioeconomic Status theme,Percentage of persons below 150% poverty estimate MOE,Percentage of persons below 150% poverty estimate
0,4,4013,4013116733,Arizona,"Census Tract 1167.33, Maricopa County, Arizona",-999.0,-999.0,-999.0000,-999.0000,0.0,100.0
1,6,6025,6025010101,California,"Census Tract 101.01, Imperial County, California",100.0,100.0,1.0000,2.9968,0.0,100.0
2,6,6037,6037980033,California,"Census Tract 9800.33, Los Angeles County, California",-999.0,-999.0,-999.0000,-999.0000,0.0,100.0
3,6,6059,6059021813,California,"Census Tract 218.13, Orange County, California",0.0,100.0,0.0000,1.0042,0.0,100.0
4,6,6065,6065046501,California,"Census Tract 465.01, Riverside County, California",24.8,11.8,0.9927,2.3462,0.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
95,48,48215,48215024122,Texas,"Census Tract 241.22, Hidalgo County, Texas",23.8,11.6,0.9913,4.5859,12.2,88.5
96,12,12001,12001000202,Florida,"Census Tract 2.02, Alachua County, Florida",6.1,4.2,0.6467,3.8889,8.9,88.4
97,6,6037,6037206302,California,"Census Tract 2063.02, Los Angeles County, California",39.1,23.8,0.9992,4.2273,25.9,88.3
98,17,17019,17019005901,Illinois,"Census Tract 59.01, Champaign County, Illinois",5.2,4.9,0.5605,1.8974,9.2,88.3


In [106]:
df1.to_csv (r'top_100.csv', encoding = 'utf-8', index = None)

#### Cnclusion: These are the top 100 high potential eligibility locations with populations at 150% and more of the federal poverty line.