In [35]:
import numpy as np
import pandas as pd
from statistics import mean

In [36]:
file_root = "../../data/raw/"

In [37]:
# The latest snapshot of pesticide usage by county is from 2017. This is a TAB separated file.
file_name = "EPest_county_estimates_2013_2017_v2.txt"

df = pd.read_csv(file_root + file_name, sep='\t', header=0)

In [38]:
print("Column names:", df.columns.values)
print("Total number of rows:", df.shape[0])
print("Number of rows for 2017:", df[df['YEAR'] == 2017].shape[0])

Column names: ['COMPOUND' 'YEAR' 'STATE_FIPS_CODE' 'COUNTY_FIPS_CODE' 'EPEST_LOW_KG'
 'EPEST_HIGH_KG']
Total number of rows: 2004805
Number of rows for 2017: 374887


In [39]:
# Select data for a single year
df = df[df['YEAR'] == 2017]

In [40]:
# In this dataset, the FIPS code is separated between State and County codes
fips_column = df.apply(lambda row: str(row.STATE_FIPS_CODE).zfill(2) + str(row.COUNTY_FIPS_CODE).zfill(3), axis=1)
df = df.assign(fips=fips_column.values)

In [42]:
# Remove commas from column values to avoid problem when exporting to CSV.
compound_column = df.apply(lambda row: row.COMPOUND.replace(",", "-"), axis=1)
df = df.assign(compound=compound_column.values)

In [43]:
# Calculate the mean mass of pesticide released per county.
#
# Based on further exploration and visualizations, we might normalize the mass of pesticides by county area or population. For now, this is the total in kg.
mass_column = df.apply(lambda row: np.nanmean([row.EPEST_LOW_KG, row.EPEST_HIGH_KG]), axis=1)
df = df.assign(mass=mass_column.values)

In [44]:
df = df[['fips', 'compound', 'mass']]

In [45]:
compounds = df.compound.unique()

for compound in compounds:
    pesticide = df[df['compound'] == compound]
    pesticide = pesticide[['fips', 'mass']]
    pesticide.dropna()
    pesticide.to_csv(f"../../data/processed/pesticide_{compound}_mass.csv", index=False)