In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [2]:
# Sample DataFrame
database = pd.read_csv('EPA_SmartLocationDatabase_V3_Jan_2021_Final.csv')

database.head()

Unnamed: 0,OBJECTID,GEOID10,GEOID20,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,CSA,CSA_Name,CBSA,...,D5DRI,D5DE,D5DEI,D2A_Ranked,D2B_Ranked,D3B_Ranked,D4A_Ranked,NatWalkInd,Shape_Length,Shape_Area
0,1,481130000000.0,481130000000.0,48,113,7825,4,206.0,"Dallas-Fort Worth, TX-OK",19100.0,...,0.184697,0.000476,0.137707,6,14,15,17,14.0,3110.36082,297836.0831
1,2,481130000000.0,481130000000.0,48,113,7825,2,206.0,"Dallas-Fort Worth, TX-OK",19100.0,...,0.323221,0.000801,0.231868,3,10,12,14,10.833333,3519.46911,484945.1466
2,3,481130000000.0,481130000000.0,48,113,7825,3,206.0,"Dallas-Fort Worth, TX-OK",19100.0,...,0.314628,0.000736,0.213146,1,1,7,17,8.333333,1697.091802,106705.9281
3,4,481130000000.0,481130000000.0,48,113,7824,1,206.0,"Dallas-Fort Worth, TX-OK",19100.0,...,0.229821,0.000708,0.205018,16,10,17,17,15.666667,2922.609204,481828.4303
4,5,481130000000.0,481130000000.0,48,113,7824,2,206.0,"Dallas-Fort Worth, TX-OK",19100.0,...,0.164863,0.000433,0.125296,4,7,11,14,10.166667,3731.971773,687684.7752


In [3]:
# Create a list with existing columns on database
column_list = database.columns.tolist()

print(column_list)

['OBJECTID', 'GEOID10', 'GEOID20', 'STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'CSA', 'CSA_Name', 'CBSA', 'CBSA_Name', 'CBSA_POP', 'CBSA_EMP', 'CBSA_WRK', 'Ac_Total', 'Ac_Water', 'Ac_Land', 'Ac_Unpr', 'TotPop', 'CountHU', 'HH', 'P_WrkAge', 'AutoOwn0', 'Pct_AO0', 'AutoOwn1', 'Pct_AO1', 'AutoOwn2p', 'Pct_AO2p', 'Workers', 'R_LowWageWk', 'R_MedWageWk', 'R_HiWageWk', 'R_PCTLOWWAGE', 'TotEmp', 'E5_Ret', 'E5_Off', 'E5_Ind', 'E5_Svc', 'E5_Ent', 'E8_Ret', 'E8_off', 'E8_Ind', 'E8_Svc', 'E8_Ent', 'E8_Ed', 'E8_Hlth', 'E8_Pub', 'E_LowWageWk', 'E_MedWageWk', 'E_HiWageWk', 'E_PctLowWage', 'D1A', 'D1B', 'D1C', 'D1C5_RET', 'D1C5_OFF', 'D1C5_IND', 'D1C5_SVC', 'D1C5_ENT', 'D1C8_RET', 'D1C8_OFF', 'D1C8_IND', 'D1C8_SVC', 'D1C8_ENT', 'D1C8_ED', 'D1C8_HLTH', 'D1C8_PUB', 'D1D', 'D1_FLAG', 'D2A_JPHH', 'D2B_E5MIX', 'D2B_E5MIXA', 'D2B_E8MIX', 'D2B_E8MIXA', 'D2A_EPHHM', 'D2C_TRPMX1', 'D2C_TRPMX2', 'D2C_TRIPEQ', 'D2R_JOBPOP', 'D2R_WRKEMP', 'D2A_WRKEMP', 'D2C_WREMLX', 'D3A', 'D3AAO', 'D3AMM', 'D3APO', 'D3B', 'D3

In [4]:
# Dropping columns
columns_to_drop = ['COUNTYFP','STATEFP', 'CSA', 'CSA_Name', 'OBJECTID', 'GEOID10', 'GEOID20', 'TRACTCE',
                   'BLKGRPCE', 'Ac_Water', 'Ac_Land'
                   , 'TotPop', 'CountHU', 'HH', 'Pct_AO0','Pct_AO1','Pct_AO2p','R_PCTLOWWAGE'
                   ,'E5_Ret', 'E5_Off', 'E5_Ind', 'E5_Svc', 'E5_Ent', 'E8_Ret', 'E8_off'
                   , 'E8_Ind', 'E8_Svc', 'E8_Ent', 'E8_Ed', 'E8_Hlth', 'E8_Pub', 'E_LowWageWk'
                   , 'E_MedWageWk', 'E_HiWageWk', 'E_PctLowWage', 'D1A', 'D1C5_RET', 'D1C5_OFF'
                   , 'D1C5_IND', 'D1C5_SVC', 'D1C5_ENT', 'D1C8_RET', 'D1C8_OFF', 'D1C8_IND', 'D1C8_SVC'
                   , 'D1C8_ENT', 'D1C8_ED', 'D1C8_HLTH', 'D1C8_PUB', 'D1D', 'D1_FLAG', 'D2A_JPHH'
                   , 'D2B_E5MIX', 'D2B_E5MIXA', 'D2B_E8MIX', 'D2B_E8MIXA', 'D2A_EPHHM', 'D2C_TRPMX1'
                   , 'D2C_TRPMX2', 'D2C_TRIPEQ', 'D2R_JOBPOP', 'D2R_WRKEMP', 'D2A_WRKEMP', 'D2C_WREMLX'
                   , 'D3A', 'D3AAO', 'D3AMM', 'D3APO', 'D3B', 'D3BAO', 'D3BMM3', 'D3BMM4', 'D3BPO3', 'D3BPO4'
                   , 'D4B025', 'D4B050', 'D4C', 'D4D', 'D4E', 'D5AE', 'D5BR', 'D5BE', 'D5CR', 'D5CRI'
                   , 'D5CE', 'D5CEI', 'D5DR', 'D5DRI', 'D5DE', 'D5DEI', 'D2A_Ranked', 'D2B_Ranked'
                   , 'D3B_Ranked', 'D4A_Ranked', 'Shape_Length', 'Shape_Area']

database = database.drop(columns=columns_to_drop)

database.head()

Unnamed: 0,CBSA,CBSA_Name,CBSA_POP,CBSA_EMP,CBSA_WRK,Ac_Total,Ac_Unpr,P_WrkAge,AutoOwn0,AutoOwn1,...,Workers,R_LowWageWk,R_MedWageWk,R_HiWageWk,TotEmp,D1B,D1C,D4A,D5AR,NatWalkInd
0,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,73.595028,73.595028,0.549,69,39,...,412,99,122,191,66,16.332625,0.8968,362.1,433601,14.0
1,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,119.829909,119.2142,0.466,0,168,...,395,76,107,212,25,5.955666,0.209707,718.84,386504,10.833333
2,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,26.367053,26.36705,0.811,19,143,...,463,136,189,138,0,27.951553,0.0,398.31,404573,8.333333
3,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,119.060687,119.060687,0.638,0,43,...,431,60,69,302,253,7.592767,2.124967,386.24,423099,15.666667
4,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,169.927211,148.74292,0.506,5,67,...,579,91,84,404,32,6.373413,0.215136,638.37,335700,10.166667


In [5]:
new_column_list = database.columns.tolist()

print(new_column_list)

['CBSA', 'CBSA_Name', 'CBSA_POP', 'CBSA_EMP', 'CBSA_WRK', 'Ac_Total', 'Ac_Unpr', 'P_WrkAge', 'AutoOwn0', 'AutoOwn1', 'AutoOwn2p', 'Workers', 'R_LowWageWk', 'R_MedWageWk', 'R_HiWageWk', 'TotEmp', 'D1B', 'D1C', 'D4A', 'D5AR', 'NatWalkInd']


In [6]:
# Specify the columns on which you want to check for missing values
columns_to_check = ['CBSA', 'CBSA_Name']

# Drop rows with missing values in the specified columns
database_cleaned = database.dropna(subset=columns_to_check)

database_cleaned

Unnamed: 0,CBSA,CBSA_Name,CBSA_POP,CBSA_EMP,CBSA_WRK,Ac_Total,Ac_Unpr,P_WrkAge,AutoOwn0,AutoOwn1,...,Workers,R_LowWageWk,R_MedWageWk,R_HiWageWk,TotEmp,D1B,D1C,D4A,D5AR,NatWalkInd
0,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,73.595028,73.595028,0.549,69,39,...,412,99,122,191,66,16.332625,0.896800,362.10,433601,14.000000
1,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,119.829909,119.214200,0.466,0,168,...,395,76,107,212,25,5.955666,0.209707,718.84,386504,10.833333
2,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,26.367053,26.367050,0.811,19,143,...,463,136,189,138,0,27.951553,0.000000,398.31,404573,8.333333
3,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,119.060687,119.060687,0.638,0,43,...,431,60,69,302,253,7.592767,2.124967,386.24,423099,15.666667
4,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,169.927211,148.742920,0.506,5,67,...,579,91,84,404,32,6.373413,0.215136,638.37,335700,10.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220646,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,34.442281,34.442281,0.548,39,158,...,0,0,0,0,0,21.891698,0.000000,-99999.00,0,6.666667
220647,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,27.042871,27.042871,0.524,44,90,...,0,0,0,0,0,31.801357,0.000000,-99999.00,0,6.333333
220648,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,49.273470,49.273470,0.653,37,131,...,0,0,0,0,0,15.748840,0.000000,-99999.00,0,6.000000
220649,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,123.296228,111.259740,0.586,0,137,...,0,0,0,0,0,3.235672,0.000000,-99999.00,0,5.333333


In [7]:
# Drop columns with value = 0
columns_check_zero = ['CBSA_EMP','CBSA_WRK']

database_cleaned = database_cleaned[(database_cleaned[columns_to_check] != 0).all(axis=1)]

database_cleaned

Unnamed: 0,CBSA,CBSA_Name,CBSA_POP,CBSA_EMP,CBSA_WRK,Ac_Total,Ac_Unpr,P_WrkAge,AutoOwn0,AutoOwn1,...,Workers,R_LowWageWk,R_MedWageWk,R_HiWageWk,TotEmp,D1B,D1C,D4A,D5AR,NatWalkInd
0,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,73.595028,73.595028,0.549,69,39,...,412,99,122,191,66,16.332625,0.896800,362.10,433601,14.000000
1,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,119.829909,119.214200,0.466,0,168,...,395,76,107,212,25,5.955666,0.209707,718.84,386504,10.833333
2,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,26.367053,26.367050,0.811,19,143,...,463,136,189,138,0,27.951553,0.000000,398.31,404573,8.333333
3,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,119.060687,119.060687,0.638,0,43,...,431,60,69,302,253,7.592767,2.124967,386.24,423099,15.666667
4,19100.0,"Dallas-Fort Worth-Arlington, TX",7189384,3545715,3364458,169.927211,148.742920,0.506,5,67,...,579,91,84,404,32,6.373413,0.215136,638.37,335700,10.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220646,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,34.442281,34.442281,0.548,39,158,...,0,0,0,0,0,21.891698,0.000000,-99999.00,0,6.666667
220647,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,27.042871,27.042871,0.524,44,90,...,0,0,0,0,0,31.801357,0.000000,-99999.00,0,6.333333
220648,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,49.273470,49.273470,0.653,37,131,...,0,0,0,0,0,15.748840,0.000000,-99999.00,0,6.000000
220649,41980.0,"San Juan-Bayamón-Caguas, PR",2142392,0,0,123.296228,111.259740,0.586,0,137,...,0,0,0,0,0,3.235672,0.000000,-99999.00,0,5.333333


In [8]:
# Filter database to only show results from NY state (NY)
# Specify the column and the value you want to filter
column_to_filter = 'CBSA_Name'
value_to_match = ', NY'

# Use boolean indexing to filter the DataFrame
filtered_database = database_cleaned[database_cleaned[column_to_filter].str.contains(value_to_match, na=False)]

filtered_database = filtered_database.reset_index(drop=True)
filtered_database

Unnamed: 0,CBSA,CBSA_Name,CBSA_POP,CBSA_EMP,CBSA_WRK,Ac_Total,Ac_Unpr,P_WrkAge,AutoOwn0,AutoOwn1,...,Workers,R_LowWageWk,R_MedWageWk,R_HiWageWk,TotEmp,D1B,D1C,D4A,D5AR,NatWalkInd
0,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,51.444490,50.313740,0.705,26,94,...,385,84,120,181,11,26.791886,0.218628,962.92,661739,10.500000
1,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,1025.230008,960.425575,0.560,12,120,...,932,151,128,653,282,1.632610,0.293620,944.15,140212,9.500000
2,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,1423.857411,1252.374073,0.557,13,171,...,855,165,217,473,4012,1.484381,3.203516,839.54,158876,13.000000
3,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,485.497632,433.369860,0.630,0,202,...,1563,280,375,908,210,8.948476,0.484575,-99999.00,153467,5.833333
4,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,148.564674,147.595085,0.626,0,50,...,498,92,91,315,194,4.674952,1.314407,968.29,278458,14.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19821,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,7356.423984,6764.490215,0.591,0,194,...,434,105,140,189,190,0.134970,0.028088,-99999.00,4940,7.000000
19822,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,7941.309931,7254.276913,0.677,0,102,...,325,71,112,142,32,0.177275,0.004411,-99999.00,249,3.833333
19823,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,5142.746758,4844.003689,0.573,47,154,...,782,190,270,322,62,0.315029,0.012799,-99999.00,690,5.666667
19824,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,2440.629186,2323.255450,0.629,16,174,...,805,178,296,331,45,0.796296,0.019369,-99999.00,992,3.833333


In [9]:
# Assuming 'your_dataset' is your DataFrame
filtered_database['Perc_Local_wrk'] = (filtered_database['CBSA_WRK'] / filtered_database['CBSA_POP'] * 100).round(2)

# Find the index of the 'CBSA_WRK' column
wrk_column_index = filtered_database.columns.get_loc('CBSA_WRK')

# Insert the 'Perc_Local_wrk' column after the 'CBSA_WRK' column
filtered_database.insert(wrk_column_index + 1, 'Perc_Local_wrk', filtered_database.pop('Perc_Local_wrk'))

filtered_database

Unnamed: 0,CBSA,CBSA_Name,CBSA_POP,CBSA_EMP,CBSA_WRK,Perc_Local_wrk,Ac_Total,Ac_Unpr,P_WrkAge,AutoOwn0,...,Workers,R_LowWageWk,R_MedWageWk,R_HiWageWk,TotEmp,D1B,D1C,D4A,D5AR,NatWalkInd
0,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,51.444490,50.313740,0.705,26,...,385,84,120,181,11,26.791886,0.218628,962.92,661739,10.500000
1,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,1025.230008,960.425575,0.560,12,...,932,151,128,653,282,1.632610,0.293620,944.15,140212,9.500000
2,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,1423.857411,1252.374073,0.557,13,...,855,165,217,473,4012,1.484381,3.203516,839.54,158876,13.000000
3,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,485.497632,433.369860,0.630,0,...,1563,280,375,908,210,8.948476,0.484575,-99999.00,153467,5.833333
4,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,148.564674,147.595085,0.626,0,...,498,92,91,315,194,4.674952,1.314407,968.29,278458,14.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19821,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,7356.423984,6764.490215,0.591,0,...,434,105,140,189,190,0.134970,0.028088,-99999.00,4940,7.000000
19822,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,7941.309931,7254.276913,0.677,0,...,325,71,112,142,32,0.177275,0.004411,-99999.00,249,3.833333
19823,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,5142.746758,4844.003689,0.573,47,...,782,190,270,322,62,0.315029,0.012799,-99999.00,690,5.666667
19824,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,2440.629186,2323.255450,0.629,16,...,805,178,296,331,45,0.796296,0.019369,-99999.00,992,3.833333


In [10]:
# Getting unique values in the 'Perc_Local_wrk' column as list
unique_Perc_Local_wrk =[filtered_database['Perc_Local_wrk'].unique()]

print(f"Number of unique CBSA Names: {filtered_database['Perc_Local_wrk'].nunique()}")

# Display unique values
unique_Perc_Local_wrk

Number of unique CBSA Names: 27


[array([46.31, 40.23, 46.48, 42.72, 47.01, 44.11, 47.67, 44.27, 39.79,
        40.82, 47.55, 44.54, 42.08, 35.68, 40.27, 43.39, 40.39, 44.29,
        43.02, 42.2 , 32.16, 33.06, 39.44, 38.48, 44.83, 44.79, 39.12])]

In [11]:
# Getting unique values in the 'CBSA_Name' column as list
unique_values =[filtered_database['CBSA_Name'].unique()]

print(f"Number of unique CBSA Names: {filtered_database['CBSA_Name'].nunique()}")

# Display unique values
unique_values

Number of unique CBSA Names: 27


[array(['New York-Newark-Jersey City, NY-NJ-PA', 'Elmira, NY',
        'Buffalo-Cheektowaga, NY', 'Binghamton, NY', 'Rochester, NY',
        'Gloversville, NY', 'Albany-Schenectady-Troy, NY',
        'Glens Falls, NY', 'Olean, NY', 'Oneonta, NY', 'Batavia, NY',
        'Poughkeepsie-Newburgh-Middletown, NY', 'Cortland, NY',
        'Malone, NY', 'Amsterdam, NY', 'Utica-Rome, NY', 'Corning, NY',
        'Syracuse, NY', 'Kingston, NY', 'Plattsburgh, NY',
        'Watertown-Fort Drum, NY', 'Ogdensburg-Massena, NY', 'Ithaca, NY',
        'Seneca Falls, NY', 'Auburn, NY', 'Hudson, NY',
        'Jamestown-Dunkirk-Fredonia, NY'], dtype=object)]

In [12]:
# Saving filtered_database to CSV file to future work
# Specify the path where you want to save the CSV file
csv_file_path = 'filtered_database.csv'

# Save the DataFrame to a CSV file
filtered_database.to_csv(csv_file_path, index=False)

In [13]:
# Openning a clean_database dataframe to project continuity

clean_database = pd.read_csv('filtered_database.csv')

clean_database.head()

Unnamed: 0,CBSA,CBSA_Name,CBSA_POP,CBSA_EMP,CBSA_WRK,Perc_Local_wrk,Ac_Total,Ac_Unpr,P_WrkAge,AutoOwn0,...,Workers,R_LowWageWk,R_MedWageWk,R_HiWageWk,TotEmp,D1B,D1C,D4A,D5AR,NatWalkInd
0,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,51.44449,50.31374,0.705,26,...,385,84,120,181,11,26.791886,0.218628,962.92,661739,10.5
1,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,1025.230008,960.425575,0.56,12,...,932,151,128,653,282,1.63261,0.29362,944.15,140212,9.5
2,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,1423.857411,1252.374073,0.557,13,...,855,165,217,473,4012,1.484381,3.203516,839.54,158876,13.0
3,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,485.497632,433.36986,0.63,0,...,1563,280,375,908,210,8.948476,0.484575,-99999.0,153467,5.833333
4,35620.0,"New York-Newark-Jersey City, NY-NJ-PA",19318471,9159786,8946175,46.31,148.564674,147.595085,0.626,0,...,498,92,91,315,194,4.674952,1.314407,968.29,278458,14.666667


### State Summary

In [23]:
# Total CBSAs
total_CBSA = len(clean_database["CBSA_Name"].value_counts())
total_CBSA


27

In [76]:
# Total NY Population surveyed
total_Population = clean_database["CBSA_POP"].unique().sum()
print(f"{total_Population:,}")


25,865,870


In [84]:
# Average Walkability Index
AVG_Walkability =clean_database["NatWalkInd"].mean()
# AVG_Walkability='{:,.2f}'.format(AVG_Walkability)
print(f"{AVG_Walkability:,.2f}")

11.55


In [113]:
# Total working population
total_Work_Population = clean_database["CBSA_WRK"].unique().sum()
print(f"{total_Work_Population:,}")


11,863,463


In [116]:
# Percentage of High income

pop_high_income= clean_database["R_HiWageWk"].sum()

high_income_percentage =(pop_high_income/total_Work_Population)

print("{:.2%}".format(high_income_percentage))

49.83%


In [117]:
# Percentage of Medium income

pop_med_income= clean_database["R_MedWageWk"].sum()

medium_income_percentage =pop_med_income/total_Work_Population
print("{:.2%}".format(medium_income_percentage))

28.68%


In [118]:
# Percentage of Low income

pop_low_income= clean_database["R_LowWageWk"].sum()

low_income_percentage =pop_low_income/total_Work_Population
print("{:.2%}".format(low_income_percentage))

21.49%
