<a href="https://colab.research.google.com/github/edgi-govdata-archiving/EEW-SOEP/blob/main/RSEI_TRI_offsite_transfers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Analysis of offsite transfers of waste recorded by TRI and modeled by RSEI, 2017-2022

In [1]:
# RSEI
# Access: https://gaftp.epa.gov/rsei/Current_Version/V2312_RY2022/Public_Release_Data/
# Data dictionary: https://www.epa.gov/rsei/rsei-data-dictionary-elements-data

# Get the RSEI data
# Download from https://gaftp.epa.gov/rsei/Current_Version/V2312_RY2022/Public_Release_Data/RSEIv2312_Public_Release_Data.zip
# and then upload to Colab manually
import zipfile
z = zipfile.ZipFile("/content/RSEIv2312_Public_Release_Data.zip") # May take some time to unzip
z.extractall("")

In [2]:
# Load RSEI data into notebook
"""
RSEI data is separated into a few tables:
elements = this is where the hazard and risk scores are
submissions = the TRI submissions made by a facility. Each submission may include multiple releases.
releases = specific details about each release submitted
chemicals = details about the toxicity of each TRI chemical
Use the data dictionary to learn more about what each table contains:
https://www.epa.gov/rsei/rsei-data-dictionary-elements-data
"""
import pandas
rsei = pandas.read_csv("/content/elements_data_rsei_v2312.csv",
                       usecols=["ReleaseNumber", "PoundsPT", "ScoreCategory",
                                "Score", "Population", "Hazard", "NCScore", "CScore"]) # Load in the CSV file
# Notice that we are not loading in all columns in the elements file, since it is a large one
# In the future, we may want to load in risk scores for specific age ranges. See data dictionary.
releases = pandas.read_csv("/content/releases_data_rsei_v2312.csv") # Load in the CSV file
submissions = pandas.read_csv("/content/submissions_data_rsei_v2312.csv") # Load in the CSV file
chem = pandas.read_csv("/content/chemical_data_rsei_v2312.csv") # Load in the CSV file
facs = pandas.read_csv("/content/facility_data_rsei_v2312.csv") # Load in the CSV File
offsite = pandas.read_csv("/content/offsite_data_rsei_v2312.csv") # Load in the CSV File

offsite

  releases = pandas.read_csv("/content/releases_data_rsei_v2312.csv") # Load in the CSV file
  submissions = pandas.read_csv("/content/submissions_data_rsei_v2312.csv") # Load in the CSV file
  facs = pandas.read_csv("/content/facility_data_rsei_v2312.csv") # Load in the CSV File
  offsite = pandas.read_csv("/content/offsite_data_rsei_v2312.csv") # Load in the CSV File


Unnamed: 0,OffsiteID,FacilityNumber,TRIFID,DropIncinerator,POTW_Incin,Name,Street,City,State,ZIPCode,...,ReachSource,ReachNotes,LatLongSource,LatLongYear,LockLL,CentroidAdjustment,NotesOnCoordinates,AdditionalSourcesForLocation,LocationConfidence,Foreign
0,-1,-1,,False,3.0,,,,,,...,,,,,,,,,,
1,4,4,,False,,"BUCKEYE MINING CO., INC. REDBU D CO., INC.",13629 WHITE ROAD STATE ROUTE 4 5,WEST POINT,OH,44492,...,,,Manual,2017.0,,,No FRS match.,,2a,
2,7,7,,False,,"DYNACAST INDUSTRIES C/O METALC, HEM INC.","AMAR BRASS COMPOUND {1725 WASH, INGTON RD, SUI...",SANTA CRUZ {PITTSBURGH},PA,15241,...,,,ESRI,2017.0,,,,,,
3,10,10,,False,1.0,MONROE COUNTY PURE WATERS,444 EAST HENRIETTA ROAD,ROCHESTER,NY,14620,...,,,FRS,2017.0,,,,,,
4,12,12,,False,,"DAIRY FARMERS OF AMERICA, LANDSPREAD SITE S 648","MORE OFFSITES SUBMITTED, REACHED THE SAVABLE L...",COLERIDGE,NE,68727,...,,,ESRI,2017.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44840,1413650,1413650,,,,UACJ (THAILAND CO) LTD,7/352 MAP YANG PHO,RAYONG,,21140,...,,,Manual,2023.0,,,,,,
44841,1413651,1413651,,,,DEVONIAN SWD,"N 31.4441847, W -102.4581129",CRANE,TX,79731,...,,,Manual,2023.0,,,,,,
44842,1413652,1413652,,,,,,,,035647609,...,,,,,,,,,,
44843,1413653,1413653,,,,ACME UNIFORM RENTAL,P. O. BOX 4578,PHILADELPHIA,PA,19134,...,,,ZIP,2023.0,,,,,,


In [3]:
# Filter RSEI to submissions since 2001
# For our purposes, 3017. 2001 (start of Bush's first administration) is ok for a long-term analysis
submissions = submissions[submissions['SubmissionYear']>=2017][["SubmissionNumber", "ChemicalNumber", "SubmissionYear", "FacilityID"]]
# Below, we lookup each chemical in the chemicals table so that information is directly available in the submissions table
submissions = pandas.merge(submissions, chem[["ChemicalNumber", "Chemical"]], on="ChemicalNumber") # Could add more column from chemical table re toxicity
# Get facility IDs (FRS)
submissions = pandas.merge(submissions, facs[["FacilityID", 'FRSID']], on="FacilityID")
# We merge the releases and submissions so that all the releases will have chemical names listed directly
compilation = pandas.merge(releases[["ReleaseNumber", "SubmissionNumber", "Media", "PoundsReleased", "OffsiteNumber"]], submissions, on="SubmissionNumber") # how="left"
# Finally, we add the releases information to the elements table, so that each "element"/risk score/hazard has its chemical name associated with it
final = pandas.merge(compilation[["ReleaseNumber", "SubmissionYear", "Chemical", "Media", "FacilityID", "FRSID", "OffsiteNumber"]], rsei, on="ReleaseNumber")
final

Unnamed: 0,ReleaseNumber,SubmissionYear,Chemical,Media,FacilityID,FRSID,OffsiteNumber,PoundsPT,ScoreCategory,Score,Population,NCScore,CScore,Hazard
0,7684524,2017,Zinc compounds,532,75686WLSHPFM173,1.100133e+11,,6900.0,36,0.000000,0.000,0.000000,0.00,22770.0
1,7684525,2017,Zinc compounds,3,75686WLSHPFM173,1.100133e+11,,1900.0,5,0.560140,20708.000,0.560140,0.00,6270.0
2,7684525,2017,Zinc compounds,3,75686WLSHPFM173,1.100133e+11,,950.0,55,0.733285,6150.540,0.733285,0.00,3135.0
3,7684525,2017,Zinc compounds,3,75686WLSHPFM173,1.100133e+11,,950.0,105,0.364044,323.713,0.364044,0.00,3135.0
4,7684526,2017,Zinc compounds,540,75686WLSHPFM173,1.100133e+11,,5.0,36,0.000000,0.000,0.000000,0.00,16.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1904566,9977941,2020,Copper,2,49441PRTCT711EP,1.100158e+11,,22.8,4,1.343960,621368.000,1.343960,0.00,34200.0
1904567,9977942,2020,Manganese,2,49441PRTCT711EP,1.100158e+11,,13.6,4,6.698800,621368.000,6.698800,0.00,163200.0
1904568,9977943,2020,Nickel,2,49441PRTCT711EP,1.100158e+11,,67.2,4,2565.240000,621368.000,107.575000,2565.24,62496000.0
1904569,9977947,2020,Lead compounds,2,4944WLLYRS2281P,1.100376e+11,,10.8,4,2.759120,582317.000,2.759120,0.00,248400.0


In [6]:
# Link releases with offsite facilities
# Each release transferred offsite contains the id of the offiste facility
# OffsiteNumber->FacilityNumber

# First, filter to offsite releases
offsite_releases = final[~final["OffsiteNumber"].isna()] # Assumes all offsite transfers have proper info/ids
# Add more contextual information about releasing facilities by merging with the facility table
offsite_releases = pandas.merge(offsite_releases, facs, how="left", left_on="FacilityID",
             right_on="FacilityID")
offsite_releases

Unnamed: 0,ReleaseNumber,SubmissionYear,Chemical,Media,FacilityID,FRSID_x,OffsiteNumber,PoundsPT,ScoreCategory,Score,...,FinalReach,FinalCOMID,ReachSource,DistanceToReach,HEM3ID,DistanceToHEM3,LLConfirmed,WaterReleases,ModeledReleases,ModChromReleases
0,7684529,2017,Manganese compounds,724,75686WLSHPFM173,1.100133e+11,1006361.0,800.000000,36,0.000000,...,1.114030e+13,1009160.0,Outfall,323.606398,3901,75593.45521,False,True,True,True
1,7684539,2017,Lead compounds,764,75686WLSHPFM173,1.100133e+11,180.0,122.900000,36,0.000000,...,1.114030e+13,1009160.0,Outfall,323.606398,3901,75593.45521,False,True,True,True
2,7684541,2017,Lead compounds,764,75686WLSHPFM173,1.100133e+11,6263.0,0.300000,36,0.000000,...,1.114030e+13,1009160.0,Outfall,323.606398,3901,75593.45521,False,True,True,True
3,7684542,2017,Lead compounds,793,75686WLSHPFM173,1.100133e+11,1208536.0,6.200000,36,0.000000,...,1.114030e+13,1009160.0,Outfall,323.606398,3901,75593.45521,False,True,True,True
4,7684546,2017,Copper compounds (this category does not inclu...,724,75686WLSHPFM173,1.100133e+11,1006361.0,1800.000000,36,0.000000,...,1.114030e+13,1009160.0,Outfall,323.606398,3901,75593.45521,False,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1111106,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.227550,7,0.007133,...,,2252939.0,,1395.094620,3947,50494.42859,False,,True,
1111107,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,57,0.000194,...,,2252939.0,,1395.094620,3947,50494.42859,False,,True,
1111108,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,107,0.000102,...,,2252939.0,,1395.094620,3947,50494.42859,False,,True,
1111109,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.302162,10,0.000000,...,,2252939.0,,1395.094620,3947,50494.42859,False,,True,


In [7]:
# Next, prepare to create a table comprised of all offsite releases
# context about those releasing facilities and the places receiving them
# Set up the columns we want from both existing tables here:
offsite_releases_cols = [
"ReleaseNumber",
"SubmissionYear",
"Chemical",
"Media",
"FacilityID",
"FRSID_x",
"OffsiteNumber",
"PoundsPT",
"ScoreCategory",
"Score",
"Population",
#"NCScore",
#"CScore",
"Hazard",
"FacilityNumber",
#"Latitude",
#"Longitude",
"FacilityName",
"County",
"State",
"ZIPCode",
"FIPS",
"ParentName",
"StandardizedParentCompany",
]
offsite_cols = [
"OffsiteID",
"FacilityNumber",
"FRSID",
"TRIFID",
"DropIncinerator",
"POTW_Incin",
"Name",
#"City",
#"State",
"ZIPCode",
#"Latitude",
#"Longitude",
"Country",
"Foreign"
]

In [8]:
# Join with offsite records
offsite_complete = pandas.merge(offsite_releases[offsite_releases_cols], offsite[offsite_cols],
             how="left",
             left_on="OffsiteNumber", right_on="FacilityNumber",
             suffixes=["_releasing", "_receiving"])
offsite_complete

Unnamed: 0,ReleaseNumber,SubmissionYear,Chemical,Media,FacilityID,FRSID_x,OffsiteNumber,PoundsPT,ScoreCategory,Score,...,OffsiteID,FacilityNumber_receiving,FRSID,TRIFID,DropIncinerator,POTW_Incin,Name,ZIPCode_receiving,Country,Foreign
0,7684529,2017,Manganese compounds,724,75686WLSHPFM173,1.100133e+11,1006361.0,800.000000,36,0.000000,...,1006361,1006361,110016711423.0,,False,,UTIL RECYCLE SVC,75169,,
1,7684539,2017,Lead compounds,764,75686WLSHPFM173,1.100133e+11,180.0,122.900000,36,0.000000,...,180,180,110008157430,,False,,CITY OF MT PLSNT LNDFLL,75455,,
2,7684541,2017,Lead compounds,764,75686WLSHPFM173,1.100133e+11,6263.0,0.300000,36,0.000000,...,6263,6263,110000459995,,False,3.0,SAFETY-KLEEN SYSTEMS DENTON RECYCLE CENTER,76208,,
3,7684542,2017,Lead compounds,793,75686WLSHPFM173,1.100133e+11,1208536.0,6.200000,36,0.000000,...,1208536,1208536,110038920549,,False,,SFTY KLEEN SYS,71108,,
4,7684546,2017,Copper compounds (this category does not inclu...,724,75686WLSHPFM173,1.100133e+11,1006361.0,1800.000000,36,0.000000,...,1006361,1006361,110016711423.0,,False,,UTIL RECYCLE SVC,75169,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1111106,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.227550,7,0.007133,...,13572,13572,,,False,1.0,CITY OF ELWOOD KANSAS WWTP,66024,,
1111107,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,57,0.000194,...,13572,13572,,,False,1.0,CITY OF ELWOOD KANSAS WWTP,66024,,
1111108,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,107,0.000102,...,13572,13572,,,False,1.0,CITY OF ELWOOD KANSAS WWTP,66024,,
1111109,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.302162,10,0.000000,...,13572,13572,,,False,1.0,CITY OF ELWOOD KANSAS WWTP,66024,,


In [9]:
# Load demographic profile of census tracts
# https://data.census.gov/table/DECENNIALDP2020.DP1?g=010XX00US,$1400000&d=DEC+Demographic+Profile

z = zipfile.ZipFile("/content/DECENNIALDP2020.DP1_2025-03-05T213348.zip") # May take some time to unzip
z.extractall("/content")

tracts = pandas.read_csv("/content/DECENNIALDP2020.DP1-Data.csv")

tracts["ID"] = tracts["GEO_ID"].astype(str).str.replace("1400000US","")
tracts = tracts[["ID", "DP1_0105P"]] # DP1_0105P = % White, non-Hispanic/Latino

# Load lookup tables of receiving and releasing facilities' census tract IDs
# pre-calculated in ArcGIS

releasing_tracts = pandas.read_csv("/content/releasing_joined.csv",
  dtype={"FacilityID": str, "FIPS_1": str})
receiving_tracts = pandas.read_csv("/content/receiving_joined.csv",
  dtype={"FacilityNumber_receiving": str, "FIPS_1": str})

releasing_tracts = pandas.merge(releasing_tracts, tracts, how="left",
             left_on="FIPS_1", right_on="ID")
receiving_tracts = pandas.merge(receiving_tracts, tracts, how="left",
             left_on="FIPS_1", right_on="ID")

# Join tract IDs and % white Census data to the releases (transfers)
offsite_complete["FacilityID"] = offsite_complete["FacilityID"].astype(str)
offsite_complete["FacilityNumber_receiving"] = offsite_complete["FacilityNumber_receiving"].astype(str)

offsite_complete = pandas.merge(offsite_complete, releasing_tracts, how="left",
             left_on = "FacilityID", right_on="FacilityID",
             suffixes = ["", "_releasingID"])
offsite_complete = pandas.merge(offsite_complete, receiving_tracts, how="left",
             left_on = "FacilityNumber_receiving", right_on="FacilityNumber_receiving",
             suffixes = ["", "_receivingID"])

offsite_complete

  tracts = pandas.read_csv("/content/DECENNIALDP2020.DP1-Data.csv")


Unnamed: 0,ReleaseNumber,SubmissionYear,Chemical,Media,FacilityID,FRSID_x,OffsiteNumber,PoundsPT,ScoreCategory,Score,...,Country,Foreign,FIPS_1,POPULATION_2020,ID,DP1_0105P,FIPS_1_receivingID,POPULATION_2020_receivingID,ID_receivingID,DP1_0105P_receivingID
0,7684529,2017,Manganese compounds,724,75686WLSHPFM173,1.100133e+11,1006361.0,800.000000,36,0.000000,...,,,48449950400,5463.0,48449950400,61.0,48467950400,5980.0,48467950400,78.9
1,7684539,2017,Lead compounds,764,75686WLSHPFM173,1.100133e+11,180.0,122.900000,36,0.000000,...,,,48449950400,5463.0,48449950400,61.0,48449950302,4066.0,48449950302,52.4
2,7684541,2017,Lead compounds,764,75686WLSHPFM173,1.100133e+11,6263.0,0.300000,36,0.000000,...,,,48449950400,5463.0,48449950400,61.0,48121020506,1964.0,48121020506,58.6
3,7684542,2017,Lead compounds,793,75686WLSHPFM173,1.100133e+11,1208536.0,6.200000,36,0.000000,...,,,48449950400,5463.0,48449950400,61.0,22017023701,3649.0,22017023701,4.8
4,7684546,2017,Copper compounds (this category does not inclu...,724,75686WLSHPFM173,1.100133e+11,1006361.0,1800.000000,36,0.000000,...,,,48449950400,5463.0,48449950400,61.0,48467950400,5980.0,48467950400,78.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1111106,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.227550,7,0.007133,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4
1111107,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,57,0.000194,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4
1111108,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,107,0.000102,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4
1111109,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.302162,10,0.000000,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4


In [10]:
offsite_complete.to_csv("offsite_complete.csv")

In [11]:
# Analysis
# summarize releasing communities (census tracts)
# For now, just looking at number of releases
# In the future, look at type/amount
# Also looking at % white for each tract (% non-white = 100-% white)
releasers = offsite_complete.groupby(by=["FIPS_1"])[["ReleaseNumber"]].nunique()
releasers = releasers.join(offsite_complete.drop_duplicates(subset="FIPS_1").set_index('FIPS_1')[["DP1_0105P"]], how="left")
releasers.sort_values("ReleaseNumber", ascending=False).head(30)

Unnamed: 0_level_0,ReleaseNumber,DP1_0105P
FIPS_1,Unnamed: 1_level_1,Unnamed: 2_level_1
48201343601,5944,0.0
18089030300,4299,4.1
31105954500,3725,88.1
49045130600,3440,38.5
48201343700,3157,36.4
5139951000,3079,15.6
48039664200,2892,69.2
48167726200,2846,28.5
39093091100,2723,87.4
53053060200,2561,47.3


In [12]:
# summarize receiving communities (census tracts)
# DP1_0105P_receivingID = % white in the receiving tract
receivers = offsite_complete.groupby(by=["FIPS_1_receivingID"])[["ReleaseNumber"]].nunique()
receivers = receivers.join(offsite_complete.drop_duplicates(subset="FIPS_1_receivingID").set_index('FIPS_1_receivingID')[["DP1_0105P_receivingID"]], how="left")
receivers.sort_values("ReleaseNumber", ascending=False).head(30)

Unnamed: 0_level_0,ReleaseNumber,DP1_0105P_receivingID
FIPS_1_receivingID,Unnamed: 1_level_1,Unnamed: 2_level_1
5139951000,15743,15.6
48201343601,15631,0.0
18089030300,11878,4.1
5125010513,10109,89.2
48355006000,7513,18.5
18097342000,7208,43.8
31105954500,7164,88.1
26163519100,6832,20.5
39093091100,6167,87.4
48245006600,6040,17.8


In [13]:
# Foreign receiving communities (countries)
foreign = offsite_complete[offsite_complete['Foreign']==1]
foreign.groupby(by=["Country"])[["ReleaseNumber"]].nunique().sort_values("ReleaseNumber", ascending=False).head(30)

Unnamed: 0_level_0,ReleaseNumber
Country,Unnamed: 1_level_1
Canada,6500
Mexico,1884
South Korea,352
Belgium,193
Germany,148
Japan,86
Sweden,83
Netherlands,78
China,53
Italy,49


In [14]:
# Summarize internal transfers (those made from facilities to other
# facilities in the same tract)
internal = offsite_complete[offsite_complete['FIPS_1'].astype(str)==
                            offsite_complete['FIPS_1_receivingID'].astype(str)]
internal

Unnamed: 0,ReleaseNumber,SubmissionYear,Chemical,Media,FacilityID,FRSID_x,OffsiteNumber,PoundsPT,ScoreCategory,Score,...,Country,Foreign,FIPS_1,POPULATION_2020,ID,DP1_0105P,FIPS_1_receivingID,POPULATION_2020_receivingID,ID_receivingID,DP1_0105P_receivingID
19,7684643,2017,Dioxin and dioxin-like compounds (Manufacturin...,779,56678NRTHWCOUNT,1.100006e+11,39510.0,0.000041,36,0.000000,...,,,27007450102,4792.0,27007450102,87.0,27007450102,4792.0,27007450102,87.0
20,7684645,2017,Lead,779,56678NRTHWCOUNT,1.100006e+11,39510.0,32.890000,36,0.000000,...,,,27007450102,4792.0,27007450102,87.0,27007450102,4792.0,27007450102,87.0
29,7684674,2017,Carbon tetrachloride,781,71730DPNTL322SU,1.100332e+11,1002728.0,681.000000,36,0.000000,...,,,05139950501,3386.0,05139950501,88.9,05139950501,3386.0,05139950501,88.9
52,7684739,2017,"1,2-Dichloroethylene",765,70669PPGNDCOLUM,1.100005e+11,994.0,1.000000,36,0.000000,...,,,22019003200,2076.0,22019003200,82.9,22019003200,2076.0,22019003200,82.9
61,7684750,2017,"1,1,1-Trichloroethane",765,70669PPGNDCOLUM,1.100005e+11,994.0,38.000000,36,0.000000,...,,,22019003200,2076.0,22019003200,82.9,22019003200,2076.0,22019003200,82.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1111106,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.227550,7,0.007133,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4
1111107,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,57,0.000194,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4
1111108,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.113775,107,0.000102,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4
1111109,9974711,2017,Tetrachlorvinphos,6,66024KMGBR52S15,1.100372e+11,13572.0,0.302162,10,0.000000,...,,,20043020300,3330.0,20043020300,87.4,20043020300,3330.0,20043020300,87.4


In [15]:
# Summarize "offsite" transfers that actually seem to be to the same facility
# Not sure what these are about
same = offsite_complete[offsite_complete['FRSID_x'].astype(str)==
                            offsite_complete['FRSID'].astype(str)]
same

Unnamed: 0,ReleaseNumber,SubmissionYear,Chemical,Media,FacilityID,FRSID_x,OffsiteNumber,PoundsPT,ScoreCategory,Score,...,Country,Foreign,FIPS_1,POPULATION_2020,ID,DP1_0105P,FIPS_1_receivingID,POPULATION_2020_receivingID,ID_receivingID,DP1_0105P_receivingID
21,7684649,2017,Chromium,726,4403WMRCNH172RE,,1413486.0,650.00000,36,0.0,...,,,39093071202,3990.0,39093071202,80.9,39035104800,1620.0,39035104800,44.9
22,7684649,2017,Chromium,726,4403WMRCNH172RE,,1413486.0,15600.00000,47,0.0,...,,,39093071202,3990.0,39093071202,80.9,39035104800,1620.0,39035104800,44.9
23,7684651,2017,Zinc compounds,726,2972WGTTRM1GITI,,1413526.0,14207.60000,36,0.0,...,,,45023020700,2006.0,45023020700,63.5,37179020800,5072.0,37179020800,57.8
163,7684916,2017,Certain glycol ethers,720,0887WZSNDS134CH,,1413601.0,22600.00000,36,0.0,...,,,34035053602,6386.0,34035053602,73.6,42079211701,4921.0,42079211701,91.3
166,7684922,2017,Certain glycol ethers,720,2911WZSNDS646MA,,1413601.0,20000.00000,36,0.0,...,,,45075011300,3036.0,45075011300,9.0,42079211701,4921.0,42079211701,91.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094039,9968638,2017,Ammonia (includes anhydrous ammonia and aqueou...,6,1804WBLMMR111BL,,1413603.0,15.51870,7,0.0,...,,,42091207800,8350.0,42091207800,88.3,42029301401,3100.0,42029301401,83.7
1094040,9968638,2017,Ammonia (includes anhydrous ammonia and aqueou...,6,1804WBLMMR111BL,,1413603.0,7.75935,57,0.0,...,,,42091207800,8350.0,42091207800,88.3,42029301401,3100.0,42029301401,83.7
1094041,9968638,2017,Ammonia (includes anhydrous ammonia and aqueou...,6,1804WBLMMR111BL,,1413603.0,7.75935,107,0.0,...,,,42091207800,8350.0,42091207800,88.3,42029301401,3100.0,42029301401,83.7
1094042,9968638,2017,Ammonia (includes anhydrous ammonia and aqueou...,6,1804WBLMMR111BL,,1413603.0,46.36260,37,0.0,...,,,42091207800,8350.0,42091207800,88.3,42029301401,3100.0,42029301401,83.7


In [16]:
# Look at the top pairs of releasing/receiving communities in terms of
# number of shipments made between them

# calculate pairs
pairs = offsite_complete.groupby(by=['FIPS_1','FIPS_1_receivingID'])[["ReleaseNumber"]].nunique()
pairs.reset_index(inplace=True)

# merge with context info (% white in releasing tract, % white in receiving tract)
pairs = pandas.merge(pairs,
                     releasing_tracts[["FIPS_1", "DP1_0105P"]].drop_duplicates(subset=["FIPS_1"]).rename(columns={"DP1_0105P": "PCTW_RELEASING"}),
                     how="left"
                     )
pairs = pandas.merge(pairs,
                     receiving_tracts[["FIPS_1", "DP1_0105P"]].drop_duplicates(subset=["FIPS_1"]).rename(columns={"FIPS_1":"FIPS_1_receivingID", "DP1_0105P": "PCTW_RECEIVING"}),
                     how="left"
                     )

pairs["PCTW_RECEIVING"] = pandas.to_numeric(pairs["PCTW_RECEIVING"], errors = "coerce")
pairs["PCTW_RELEASING"] = pandas.to_numeric(pairs["PCTW_RELEASING"], errors = "coerce")

# analyze whether the pair constitutes "dumping" (more white tract sending to
# less white tract)
pairs.loc[pairs["PCTW_RELEASING"] > pairs["PCTW_RECEIVING"], "DUMPING"] = 1
pairs.sort_values("ReleaseNumber", ascending=False).head(30)

Unnamed: 0,FIPS_1,FIPS_1_receivingID,ReleaseNumber,PCTW_RELEASING,PCTW_RECEIVING,DUMPING
50987,48201343700,48201343700,1307,36.4,36.4,
48941,48039664200,48039664200,849,69.2,69.2,
50918,48201343601,48201343601,783,0.0,0.0,
48952,48039664200,48201343601,778,69.2,0.0,1.0
53556,49045130600,49045130600,741,38.5,38.5,
50837,48201343601,5139951000,731,0.0,15.6,
4425,6037980002,6037980037,664,0.0,,
37991,39093091100,26163986501,651,87.4,,
37997,39093091100,39143960800,620,87.4,85.2,1.0
53541,49045130600,31105954500,615,38.5,88.1,


In [14]:
# data export (for calculating tract ids in ArcGIS)
releasing_locations = offsite_complete.drop_duplicates(subset=["FacilityID"])
releasing_locations.to_csv("releasing_locations.csv")

receiving_locations = offsite_complete.drop_duplicates(subset=["FacilityNumber_receiving"])
receiving_locations.to_csv("receiving_locations.csv")