# Data processing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [42]:
# features from EJScreen

features = [
            # "OBJECTID", # unique ID for block group in geodatabase
            # "ID", # census FIPS code for block group
            # "STATE_NAME", # name of state
            "ST_ABBREV", # state abbreviation
            "CNTY_NAME", # country name
            # "REGION", # US EPA region number
            "ACSTOTPOP", # total population
            "ACSTOTHH", # households (for limited English speaking)
            "MINORPOP", # num people of color
            # "MINORPCT", # % people of color
            "LOWINCOME", # num of low income
            # "LOWINCPCT", # % low income
            "UNEMPLOYED", # num of unemployed
            # "UNEMPPCT", # % unemployed
            "LESSHS", # num less than high school education
            # "LESSHSPCT", # % less than high school education
            "PM25", # particulate matter 2.5
            "OZONE", # ozone
            "DSLPM", # diesel particulate matter
            "CANCER", # air toxics cancer risk
            "RESP", # air toxics respiratory HI
            "PTRAF", # traffic proximity
            "PNPL", # superfund proximity
            "PRMP", # RMP facility proximity
            "PTSDF", # hazardous waste proximity
            "UST", # underground storage tanks
            "PWDIS", # wastewater discharge
            "AREALAND", # land area in square meters
            "AREAWATER", # water area in square meters
            "NPL_CNT", # num superfund facilities in block group
            "TSDF_CNT" # num hazardous waste facilities in block group
            ]

dem_features = [ # demographics (to sum)
            "ST_ABBREV", # state abbreviation
            "CNTY_NAME", # country name
            "ACSTOTPOP", # total population
            "ACSTOTHH", # households (for limited English speaking)
            "MINORPOP", # num people of color
            "LOWINCOME", # num of low income
            "UNEMPLOYED", # num of unemployed
            "LESSHS", # num less than high school education
            ]

num_features = [ # numeric features (to mean)
            "ST_ABBREV", # state abbreviation
            "CNTY_NAME", # country name
            "PM25", # particulate matter 2.5
            "OZONE", # ozone
            "DSLPM", # diesel particulate matter
            "CANCER", # air toxics cancer risk
            "RESP", # air toxics respiratory HI
            "PTRAF", # traffic proximity
            "PNPL", # superfund proximity
            "PRMP", # RMP facility proximity
            "PTSDF", # hazardous waste proximity
            "UST", # underground storage tanks
            "PWDIS", # wastewater discharge
            "AREALAND", # land area in square meters
            "AREAWATER", # water area in square meters
            "NPL_CNT", # num superfund facilities in block group
            "TSDF_CNT" # num hazardous waste facilities in block group
            ]

In [24]:
# read in data
landfills_df = pd.read_csv('data\landfills.csv')
ejScreen_full_df = pd.read_csv("data\EJSCREEN_2022_Full_with_AS_CNMI_GU_VI.csv")

  ejScreen_full_df = pd.read_csv("data\EJSCREEN_2022_Full_with_AS_CNMI_GU_VI.csv")


In [109]:
# subset features of interest and remove mentions of parish or county
ejScreen_df = ejScreen_full_df[features]
ejScreen_df['CNTY_NAME'] = [i.replace(' Parish', '').replace(' County', '').replace(' Municipality', '').replace(' Borough', '')
                            .replace(' Census Area', '').replace(' Municipio', '')
                            for i in ejScreen_df['CNTY_NAME']]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ejScreen_df['CNTY_NAME'] = [i.replace(' Parish', '').replace(' County', '').replace(' Municipality', '').replace(' Borough', '')


In [105]:
ejScreen_df[dem_features].dropna().groupby(by=["CNTY_NAME", 'ST_ABBREV']).sum()
ejScreen_df[dem_features].groupby(by=["CNTY_NAME", 'ST_ABBREV']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACSTOTPOP,ACSTOTHH,MINORPOP,LOWINCOME,UNEMPLOYED,LESSHS
CNTY_NAME,ST_ABBREV,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Abbeville,SC,24582.0,9597.0,7780.0,8950.0,534.0,2947.0
Acadia,LA,62371.0,22598.0,14195.0,27169.0,2572.0,7994.0
Accomack,VA,32560.0,13641.0,13055.0,14190.0,535.0,4177.0
Ada,ID,469473.0,179708.0,74243.0,106033.0,9273.0,14930.0
Adair,IA,7048.0,3217.0,331.0,2038.0,215.0,253.0
...,...,...,...,...,...,...,...
Yuma,AZ,211931.0,74624.0,147088.0,91415.0,7447.0,34280.0
Yuma,CO,10013.0,4108.0,2596.0,3157.0,214.0,912.0
Zapata,TX,14243.0,4689.0,13670.0,8081.0,466.0,2836.0
Zavala,TX,11930.0,3674.0,11304.0,7165.0,224.0,1724.0


In [106]:
ejScreen_df[num_features].dropna().groupby(by=['ST_ABBREV', "CNTY_NAME"]).mean()
ejScreen_df[num_features].groupby(by=['ST_ABBREV', "CNTY_NAME"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,PM25,OZONE,DSLPM,CANCER,RESP,PTRAF,PNPL,PRMP,PTSDF,UST,PWDIS,AREALAND,AREAWATER,NPL_CNT,TSDF_CNT
ST_ABBREV,CNTY_NAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
AK,Aleutians East,,,0.000928,8.000000,0.100000,,0.001116,2.234237,0.004049,0.000424,,6.030558e+09,6.933284e+09,0.000000,0.000000
AK,Aleutians West,,,0.109595,8.000000,0.100000,,0.040295,1.940781,0.008495,0.003795,,3.792628e+09,8.394545e+09,0.333333,0.333333
AK,Anchorage,,,0.496353,48.350000,0.626500,633.067023,0.131820,0.869782,0.354112,5.700917,,2.210296e+07,3.106511e+06,0.010000,0.015000
AK,Bethel,,,0.002093,8.000000,0.100000,,0.001525,0.003567,0.002309,0.350601,,1.052238e+10,1.276468e+09,0.000000,0.000000
AK,Bristol Bay,,,0.002583,8.000000,0.100000,,0.002072,0.508547,0.007272,0.043802,,1.248316e+09,9.497756e+08,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WY,Sweetwater,6.096907,54.004162,0.136054,14.705882,0.194118,226.182129,0.004898,0.388345,0.410959,6.960053,0.000220,7.942869e+08,4.889125e+06,0.000000,0.117647
WY,Teton,4.571693,49.826308,0.028317,10.000000,0.100000,199.958659,0.009046,0.011488,0.009851,1.139733,0.000017,5.175895e+08,2.854296e+07,0.000000,0.000000
WY,Uinta,6.471276,55.901133,0.061977,10.000000,0.176471,129.874998,0.012324,0.609177,0.061058,3.353313,0.000541,3.171548e+08,9.779892e+05,0.000000,0.058824
WY,Washakie,4.890414,47.342993,0.033101,10.000000,0.200000,108.645307,0.004945,0.131658,0.009233,6.390798,0.000002,6.442392e+08,1.157321e+06,0.000000,0.000000


In [110]:
# save as csv file
ej = pd.concat([ejScreen_df[dem_features].groupby(by=['ST_ABBREV', "CNTY_NAME"]).sum(), 
           ejScreen_df[num_features].groupby(by=['ST_ABBREV', "CNTY_NAME"]).mean()], axis=1)

ej.to_csv("data\ejscreen2022.csv")

In [99]:
ej

Unnamed: 0_level_0,Unnamed: 1_level_0,ACSTOTPOP,ACSTOTHH,MINORPOP,LOWINCOME,UNEMPLOYED,LESSHS,PM25,OZONE,DSLPM,CANCER,...,PTRAF,PNPL,PRMP,PTSDF,UST,PWDIS,AREALAND,AREAWATER,NPL_CNT,TSDF_CNT
ST_ABBREV,CNTY_NAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AK,Aleutians East Borough,3389.0,988.0,2939.0,1011.0,86.0,354.0,,,0.000928,8.000000,...,,0.001116,2.234237,0.004049,0.000424,,6.030558e+09,6.933284e+09,0.000000,0.000000
AK,Aleutians West Census Area,5708.0,1306.0,4385.0,1109.0,159.0,345.0,,,0.109595,8.000000,...,,0.040295,1.940781,0.008495,0.003795,,3.792628e+09,8.394545e+09,0.333333,0.333333
AK,Anchorage Municipality,292090.0,106970.0,126363.0,62252.0,8055.0,11555.0,,,0.496353,48.350000,...,633.067023,0.131820,0.869782,0.354112,5.700917,,2.210296e+07,3.106511e+06,0.010000,0.015000
AK,Bethel Census Area,18263.0,4499.0,16544.0,9872.0,1342.0,1628.0,,,0.002093,8.000000,...,,0.001525,0.003567,0.002309,0.350601,,1.052238e+10,1.276468e+09,0.000000,0.000000
AK,Bristol Bay Borough,739.0,284.0,385.0,107.0,17.0,35.0,,,0.002583,8.000000,...,,0.002072,0.508547,0.007272,0.043802,,1.248316e+09,9.497756e+08,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WY,Sweetwater,43352.0,15726.0,9205.0,10499.0,1437.0,2098.0,6.096907,54.004162,0.136054,14.705882,...,226.182129,0.004898,0.388345,0.410959,6.960053,0.000220,7.942869e+08,4.889125e+06,0.000000,0.117647
WY,Teton,23356.0,10027.0,4387.0,5128.0,280.0,945.0,4.571693,49.826308,0.028317,10.000000,...,199.958659,0.009046,0.011488,0.009851,1.139733,0.000017,5.175895e+08,2.854296e+07,0.000000,0.000000
WY,Uinta,20374.0,7789.0,2581.0,5150.0,453.0,903.0,6.471276,55.901133,0.061977,10.000000,...,129.874998,0.012324,0.609177,0.061058,3.353313,0.000541,3.171548e+08,9.779892e+05,0.000000,0.058824
WY,Washakie,7933.0,3437.0,1446.0,2075.0,170.0,441.0,4.890414,47.342993,0.033101,10.000000,...,108.645307,0.004945,0.131658,0.009233,6.390798,0.000002,6.442392e+08,1.157321e+06,0.000000,0.000000


In [89]:
ej.index

MultiIndex([(         'Abbeville County', 'SC'),
            (            'Acadia Parish', 'LA'),
            (          'Accomack County', 'VA'),
            (               'Ada County', 'ID'),
            (             'Adair County', 'IA'),
            (             'Adair County', 'KY'),
            (             'Adair County', 'MO'),
            (             'Adair County', 'OK'),
            (             'Adams County', 'CO'),
            (             'Adams County', 'IA'),
            ...
            (              'York County', 'SC'),
            (              'York County', 'VA'),
            (             'Young County', 'TX'),
            (              'Yuba County', 'CA'),
            ('Yukon-Koyukuk Census Area', 'AK'),
            (              'Yuma County', 'AZ'),
            (              'Yuma County', 'CO'),
            (            'Zapata County', 'TX'),
            (            'Zavala County', 'TX'),
            (           'Ziebach County', 'SD')],
   

In [73]:
pd.read_csv('data\ejscreen2022.csv')

Unnamed: 0,CNTY_NAME,ST_ABBREV,ACSTOTPOP,ACSTOTHH,MINORPOP,LOWINCOME,UNEMPLOYED,LESSHS,PM25,OZONE,...,PTRAF,PNPL,PRMP,PTSDF,UST,PWDIS,AREALAND,AREAWATER,NPL_CNT,TSDF_CNT
0,Abbeville County,SC,24582.0,9597.0,7780.0,8950.0,534.0,2947.0,8.094903,36.914376,...,9.912024,0.026612,0.312729,0.285208,0.704876,1.186016e-01,6.058020e+07,2.534336e+06,0.000000,0.095238
1,Acadia Parish,LA,62371.0,22598.0,14195.0,27169.0,2572.0,7994.0,9.012808,35.335031,...,392.537163,0.052461,0.503018,0.125235,1.041398,6.305854e-05,1.586050e+07,5.546531e+04,0.009346,0.009346
2,Accomack County,VA,32560.0,13641.0,13055.0,14190.0,535.0,4177.0,6.276000,41.477595,...,90.740924,0.011686,0.184148,0.100388,0.228207,1.763528e-01,3.324931e+07,6.369314e+07,0.000000,0.057143
3,Ada County,ID,469473.0,179708.0,74243.0,106033.0,9273.0,14930.0,8.327458,44.015253,...,347.893086,0.013872,0.404451,0.333309,1.859752,6.352569e+00,8.256681e+06,6.715077e+04,0.000000,0.015152
4,Adair County,IA,7048.0,3217.0,331.0,2038.0,215.0,253.0,7.522954,40.660532,...,51.840745,0.023570,1.564921,0.298330,0.446731,1.146094e-09,1.638227e+08,2.886662e+05,0.000000,0.111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3228,Yuma County,AZ,211931.0,74624.0,147088.0,91415.0,7447.0,34280.0,8.349043,46.205262,...,269.173650,0.125580,1.276695,0.394887,2.344964,1.307090e-02,8.655015e+07,8.032218e+04,0.006061,0.036364
3229,Yuma County,CO,10013.0,4108.0,2596.0,3157.0,214.0,912.0,5.582793,46.878680,...,59.164928,0.007836,1.991251,0.011928,0.188693,4.513120e-03,6.804182e+08,1.237185e+06,0.000000,0.000000
3230,Zapata County,TX,14243.0,4689.0,13670.0,8081.0,466.0,2836.0,10.023407,33.409354,...,99.064545,0.006880,0.504232,0.013632,1.253947,4.091981e-04,1.847054e+08,1.102651e+07,0.000000,0.000000
3231,Zavala County,TX,11930.0,3674.0,11304.0,7165.0,224.0,1724.0,8.869855,38.613895,...,134.240292,0.007797,0.029664,0.040335,2.155758,4.463049e-06,3.733632e+08,1.245412e+06,0.000000,0.000000


In [75]:
landfills_df.columns
features = [
    # "Latitude",
    # "Longitude",
    "County",
    "State",
    "Waste in Place (tons)",
    "Current Project Status"
]

Index(['Unnamed: 0', 'GHGRP ID', 'Landfill ID', 'Landfill Name', 'State',
       'Physical Address', 'City', 'County', 'Zip Code', 'Latitude',
       'Longitude', 'Ownership Type', 'Landfill Owner Organization(s)',
       'Year Landfill Opened', 'Landfill Closure Year',
       'Current Landfill Status', 'Waste in Place (tons)',
       'Waste in Place Year', 'LFG Collection System In Place?',
       'LFG Collected (mmscfd)', 'LFG Flared (mmscfd)', 'Project ID',
       'Current Project Status', 'Project Name', 'Project Start Date',
       'Project Shutdown Date', 'Project Type Category',
       'LFG Energy Project Type', 'RNG Delivery Method',
       'Actual MW Generation', 'Rated MW Capacity',
       'LFG Flow to Project (mmscfd)',
       'Current Year Emission Reductions (MMTCO2e/yr) - Direct',
       'Current Year Emission Reductions (MMTCO2e/yr) - Avoided'],
      dtype='object')

In [82]:
pd.unique(landfills_df['Current Project Status'])

array(['Candidate', 'Low Potential', 'Future Potential', 'Operational',
       'Unknown', 'Shutdown', 'Planned', 'Construction'], dtype=object)