In [2]:
import pandas as pd

beef_file       = "data/raw/BeefVeal_YearlyFull.xlsx"
cattle_file     = "data/raw/Cattle_YearlyFull.xlsx"
pork_file       = "data/raw/Pork_YearlyFull.xlsx"
poultry_file    = "data/raw/BroilerTurkey_YearlyFull.xlsx"
population_file = "data/raw/POPULATION.xlsx"

# Doin up the data

In [3]:
population = pd.read_excel(population_file, skiprows=1)
population = population[population["Region-subregion-country"] == "United States of America"] \
                       .drop(columns=["Index", "Variant", "Country code", "Type", "Parent code"]) \
                       .rename(columns={"Region-subregion-country": "country"}) \
                       .transpose() \
                       .reset_index()

population.columns = ["year", "population"]
population = population.drop(population.index[0])
population.year = population.year.astype(int)

population = population[population.year >= 1989]

population.head()

Unnamed: 0,year,population
40,1989,249726
41,1990,252120
42,1991,254539
43,1992,256991
44,1993,259532


In [4]:
# filter to Totals
# drop extraneous columns
# rename col name -> country
# rename livestock values

cattle = pd.read_excel(cattle_file, skiprows=1)
cattle = cattle[(cattle["Import/export"] == "Cattle imports, total") & (cattle["name"] == "Total\n ")] \
               .drop(columns=["geography code", "Jan-Jul 20", "Jan-Jul 21"]) \
               .rename(columns={"name": "country"}) \
               .replace({"Total\n ": "Total", "Cattle imports, total": "Cattle"}) \
               .transpose() \
               .reset_index()

pork = pd.read_excel(pork_file, skiprows=1)
pork = pork[(pork["Import/export"] == "Pork imports") & (pork["name"] == "Total\n ")] \
           .drop(columns=["geography code", "Jan-Jul 20", "Jan-Jul 21"]) \
           .rename(columns={"name": "country"}) \
           .replace({"Total\n ": "Total", "Pork imports": "Pork"}) \
           .transpose() \
           .reset_index()

poultry = pd.read_excel(poultry_file, skiprows=1)
poultry = poultry[(poultry["Import/export"] == "Broiler imports (1,000 pounds)") & (poultry["name"] == "Total\n ")] \
                 .drop(columns=["geography code", "Jan-Jul 20", "Jan-Jul 21"]) \
                 .rename(columns={"name": "country"}) \
                 .replace({"Total\n ": "Total", "Broiler imports (1,000 pounds)": "Poultry"}) \
                 .transpose() \
                 .reset_index()

poultry.head(1)

Unnamed: 0,index,10
0,Import/export,Poultry


In [5]:
# transpose (wide to long) and remove resulting bad rows, rename headers
cattle.columns = ["year", "pounds"]
cattle = cattle.drop(cattle.index[0:2])

pork.columns = ["year", "pounds"]
pork = pork.drop(pork.index[0:2])

poultry.columns = ["year", "pounds"]
poultry = poultry.drop(poultry.index[0:2])

poultry.head(1)

Unnamed: 0,year,pounds
2,1989,876.586


In [6]:
# set pounds column to a float
cattle.pounds = cattle.pounds.astype(float)
pork.pounds = pork.pounds.astype(float)
poultry.pounds = poultry.pounds.astype(float)

poultry.head(1)

Unnamed: 0,year,pounds
2,1989,876.586367


In [7]:
# convert year to datetime
population["year (dt)"] = population.year.apply(pd.to_datetime)
cattle["year (dt)"] = cattle.year.apply(pd.to_datetime)
pork["year (dt)"] = pork.year.apply(pd.to_datetime)
poultry["year (dt)"] = poultry.year.apply(pd.to_datetime)

population = population[["year", "year (dt)", "population"]]
cattle = cattle[["year", "year (dt)", "pounds"]]
pork = pork[["year", "year (dt)", "pounds"]]
poultry = poultry[["year", "year (dt)", "pounds"]]

poultry.head(1)

Unnamed: 0,year,year (dt),pounds
2,1989,1989-01-01,876.586367


# Save the data

In [31]:
with pd.ExcelWriter('data/processed/processed_livestock_data.xlsx') as writer:
    population.to_excel(writer, sheet_name = 'population', index = False)
    poultry.to_excel(writer, sheet_name = 'poultry', index = False)
    cattle.to_excel(writer, sheet_name = 'cattle', index = False)
    pork.to_excel(writer, sheet_name = 'pork', index = False)