In [121]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from shapely.geometry import Point
from shapely.geometry import shape
import geopandas as gpd
from osgeo import gdal
import numpy as np
import os
import requests

# Counties and CDL codes
This was stage 1 work getting Counties and CDL codes into the right formats. Very basic stuff.

In [122]:
#Load Data
cdl_codes = pd.read_csv("../synced-data/origins_data/cdl-codes.csv")
county_crops = pd.read_csv("../synced-data/origins_data/county-crops-conus-all.csv")


## CDL Codes
This was initial work on CDL Codes. Most of the work on this data was done manually joining different data source references

In [123]:
# Drops all blank crop codes
cdl_codes = cdl_codes[cdl_codes.Class_Names != ' ']
cdl_codes = cdl_codes.reset_index(drop=True)

In [124]:
# returns the crop name based on crop code e.g. "crop_1" returns "Corn"
def crop_code_to_crop(crop_code):
    return cdl_codes[cdl_codes["crop_code"] == crop_code]["Class_Names"].values[0]

## Counties Data

In [125]:
# creates lists of columns to be used later
col_list = []
for c in county_crops.columns:
    col_list.append(c)
crop_col = []
for c in col_list:
    if c.startswith("crop"):
        crop_col.append(c)

In [126]:
len(county_crops["statefp"].unique())

49

In [127]:
# Joins state FIP codes to the counties crops data. This is used to join the state name to the counties crops data
statefp = pd.read_csv("../synced-data/origins_data/statefpcodes.csv")
statefp.columns = statefp.columns.str.lower()
county_crops["statefp"] = county_crops["statefp"].astype(int)
statefp = statefp[statefp["state"].notna()]
statefp["fips"] = statefp["fips"].astype(int)
county_crops["statefp"] = county_crops["statefp"].astype(str)
statefp["fips"] = statefp["fips"].astype(str)
# join State column and Postal column to counties based on FIPS and STATEFP
county_crops = county_crops.merge(statefp, left_on="statefp", right_on="fips")
county_crops = county_crops.drop(columns=["unnamed: 3", "unnamed: 4", "unnamed: 5", "fips"])

In [128]:
# saving as a checkpoint to be used later in the notebook
county_crops.to_csv("../synced-data/origins_data/county-crops-conus-all-v2.csv", index=False) 

# State Level  Approach
We're using a state level conversion approach instead of the USDA data approach. We're using [Zenedo Data](https://zenodo.org/record/7332106#.ZCHhtuxuerM) referenced in the write up for our conversions here.

1. Create DF with county crop pixels "unsynced-data/county-crops-v1.geojson"
2. Create a conversion dataframe with total pixels of each crop across the US with Crop Name
3. Use state level data to convert pixels to kCals "unsynced-data/Stability_Crop_Diversity-2.0/Data/Outputs/Intermediate_Data/Clean_Data.csv"
4. for every column in county level crops create a column with the kCals of that crop

## Prepping data

In [129]:
# Source: zonal histogram of cropscape data from 2017

# use the below read line if you are starting from this cell
# county_crops = pd.read_csv("../synced-data/origins_data/county-crops-conus-all-v2.csv")

# Drops any nan value in the crop columns
for c in county_crops.columns:
    if c.startswith("crop"):
        county_crops[c] = county_crops[c].replace(0, np.nan)

# reading dataframe of list of crops we'll be using through the first crosswalk
final_crops = pd.read_csv("../synced-data/origins_data/final_crops.csv")

In [130]:
# Read in production data. Source: Zenedo data from report on stabilty of crop diversity. Linked above.
production = pd.read_csv("../synced-data/origins_data/stability_crop_diversity.csv")
production = production.drop(columns=["Unnamed: 0"])

In [131]:
# creates a dataframe with each unique crop name and the data associated with a row that has that crop name
# this was used to make the final_crops.csv file that will be all the crops we use in the final data
unique = production['Crop_Name'].unique().tolist()
unique_df = pd.DataFrame()
# for each value in unique list, append the row with that crop name to unique_df using the concat method
for u in unique:
    unique_df = pd.concat([unique_df, production[production["Crop_Name"] == u].head(1)])
unique_df

# write unique_df to csv if backup is needed
# unique_df.to_csv("../unsynced-data/Production_data_clean.csv")

Unnamed: 0,State_Abbr,Year,Crop_Name,Price_Received_USD_kg,Crop_Area_ha,Production_kg,FIPS,kcal_kg,Production_kcal,Production_USD
0,CA,1998,TOMATOES-ALL CLASSES,0.106907,130716.309187,8.535522e+09,6,187.0,1.596143e+12,1.317294e+09
1,CA,1999,CARROTS-ALL CLASSES,0.343009,30109.267503,1.029173e+09,6,365.0,3.756482e+11,5.417210e+08
29,DE,2005,SWEET CORN-ALL CLASSES,0.175366,4168.352894,5.943874e+07,10,310.0,1.842601e+10,1.328234e+07
31,FL,1999,CUCUMBERS-ALL CLASSES,0.408305,7041.683529,1.990997e+08,12,146.0,2.906856e+10,1.247492e+08
38,FL,2006,BEANS-SNAP,1.079742,12950.222582,1.340909e+08,12,273.0,3.660681e+10,1.868575e+08
...,...,...,...,...,...,...,...,...,...,...
11295,MT,1999,MUSTARD-ALL CLASSES,,8498.583569,8.095154e+06,30,251.0,2.031884e+09,
14679,NY,1998,BEETS-ALL CLASSES,0.086531,930.797248,2.503831e+07,36,288.0,7.211032e+09,3.127692e+06
16292,OR,1999,HAZELNUTS-ALL CLASSES,,,3.420087e+07,41,2575.0,8.806725e+10,
16435,OR,2007,BLACKBERRIES-ALL CLASSES,1.071641,2711.452853,2.897929e+07,41,413.0,1.196845e+10,3.270129e+07


In [132]:
production.sample(5)

Unnamed: 0,State_Abbr,Year,Crop_Name,Price_Received_USD_kg,Crop_Area_ha,Production_kg,FIPS,kcal_kg,Production_kcal,Production_USD
19989,UT,2010,APPLES-ALL CLASSES,0.551256,566.572238,5442120.0,49,468.0,2546912000.0,3000000.0
21049,WA,2003,STRAWBERRIES-ALL CLASSES,1.147948,728.45002,7348190.0,53,301.0,2211805000.0,11423640.0
9320,MI,2003,ASPARAGUS-ALL CLASSES,1.340632,6070.416835,14378870.0,26,106.0,1524160000.0,26105760.0
6230,ID,2011,BARLEY-ALL CLASSES,0.258584,202347.227843,1012417000.0,16,3530.0,3573831000000.0,211735600.0
14005,NM,1983,POTATOES-ALL CLASSES,0.141096,2306.758397,73708700.0,35,578.0,42603630000.0,15335940.0


In [133]:
county_crops.sample(5)

Unnamed: 0,statefp,countyfp,countyns,geoid,name,namelsad,lsad,classfp,mtfcc,csafp,cbsafp,metdivfp,funcstat,aland,awater,intptlat,intptlon,crop_0,crop_1,crop_2,crop_3,crop_4,crop_5,crop_6,crop_10,crop_11,crop_12,crop_13,crop_14,crop_21,crop_22,crop_23,crop_24,crop_25,crop_26,crop_27,crop_28,crop_29,crop_30,crop_31,crop_32,crop_33,crop_34,crop_35,crop_36,crop_37,crop_38,crop_39,crop_41,crop_42,crop_43,crop_44,crop_45,crop_46,crop_47,crop_48,crop_49,crop_50,crop_51,crop_52,crop_53,crop_54,crop_55,crop_56,crop_57,crop_58,crop_59,crop_60,crop_61,crop_66,crop_67,crop_68,crop_69,crop_70,crop_71,crop_72,crop_74,crop_75,crop_76,crop_77,crop_92,crop_111,crop_112,crop_121,crop_122,crop_123,crop_124,crop_131,crop_141,crop_142,crop_143,crop_152,crop_176,crop_190,crop_195,crop_204,crop_205,crop_206,crop_207,crop_208,crop_209,crop_211,crop_212,crop_213,crop_214,crop_216,crop_217,crop_218,crop_219,crop_220,crop_221,crop_222,crop_223,crop_224,crop_225,crop_226,crop_227,crop_229,crop_231,crop_232,crop_233,crop_236,crop_237,crop_238,crop_239,crop_240,crop_241,crop_242,crop_243,crop_244,crop_245,crop_246,crop_247,crop_248,crop_249,crop_250,crop_254,state,postal
1595,49,57,1448042,49057,Weber,Weber County,6,H1,G4020,482.0,36260.0,,A,1492541890,216440007,41.270325,-111.876883,,15416.0,,,3.0,,,,,,,,1231.0,,205.0,7398.0,,,,253.0,,,,,183.0,,,75543.0,23067.0,,,,9.0,,1.0,,,6.0,57.0,2604.0,,,,10.0,,,,14.0,,19.0,,3423.0,75.0,75.0,16.0,,12.0,,,,,,,,264194.0,,52874,80555,52487,19403.0,37259.0,513012.0,94300.0,325.0,336165.0,70767.0,5388.0,71704.0,,243.0,,,,,,,,,11.0,,,13.0,,,426.0,,,,,,369.0,,,,,,,,,,,,,,,,,,,,Utah,UT
1421,29,13,758461,29013,Bates,Bates County,6,H1,G4020,312.0,28140.0,,A,2167033438,38030738,38.257217,-94.339246,,263682.0,,,118.0,447497.0,1.0,,,,1.0,,,,,925.0,,27215.0,56.0,36.0,153.0,,,,,,,535.0,130326.0,,,,,,,,,,,,,,,,,,,,318.0,136.0,,22.0,,,,,,,,1887.0,,,,,22994.0,,81370,13979,1933,611.0,593.0,261649.0,,2.0,1536.0,786660.0,89889.0,2362.0,,107.0,,,,,,,,,,,,,,,,,,355.0,,,,,,,31.0,,,,1.0,,,,,,,,,,,1.0,Missouri,MO
1361,29,107,758508,29107,Lafayette,Lafayette County,6,H1,G4020,312.0,28140.0,,A,1627237794,27073170,39.068705,-93.802639,,391582.0,,,108.0,455298.0,1.0,,,,,,,,,1619.0,,4916.0,47.0,1.0,,,,,,,,1407.0,60041.0,,,,,,,,,,,,,,,,,,,,20.0,58.0,,27.0,,,26.0,,,,,6.0,,,,,31170.0,,83551,30225,6214,1440.0,370.0,211348.0,,3.0,797.0,326328.0,12660.0,1722.0,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.0,,,,,,,,,,,,Missouri,MO
95,53,31,1531936,53031,Jefferson,Jefferson County,6,H1,G4020,,,,A,4671633827,994992501,47.805707,-123.527057,1084005.0,293.0,,,1.0,,,,,16.0,,,23.0,,3.0,346.0,,,,1.0,,,,,,,,70.0,2025.0,,,,,9.0,11.0,,,19.0,,,1.0,,,13.0,,556.0,,,,215.0,,271.0,,,10.0,3.0,37.0,,,,,,,,18431.0,127346.0,105446,34210,6764,1680.0,216333.0,92075.0,3913220.0,129416.0,533475.0,100653.0,28661.0,7066.0,,,,,,,,,,,,,,1.0,,9.0,,,,,,,,,,,,,,,,,378.0,,2.0,,,,,,57.0,,Washington,WA
2991,19,87,465232,19087,Henry,Henry County,6,H1,G4020,,,,A,1124800194,5774829,40.984803,-91.547259,,326063.0,,,,276201.0,,,,,,,,,,508.0,,2.0,104.0,573.0,,,,,,,,14082.0,3698.0,,,,,,,,,,,,,,,,,,,,14.0,,,17.0,,,,,,,,,,,,,9895.0,,50009,25209,5151,975.0,661.0,204473.0,169.0,80.0,18673.0,191869.0,9346.0,1265.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Iowa,IA


# Crosswalking and Merging Data
The next blocks of code are crosswalking data sources to provide accurate production numbers
![Crosswalk](./data_crosswalks.png)

Notes to myself:
Developing a function that will add kcal's to counties df
1. step 1 rollup counties data when needed as shown in final_crops dataframe
2. delete all columns that aren't present in the final_crops data
3. choose the closest year to 2017 or most recent in the production data
4. roll up values in production data when needed as shown in final_crops dataframe

## Crosswalk 1 CDL -> Stabilty Crop Diversity

In [134]:
# rolling up counties data as dictated by final crops
county_crops["crop_42"] = county_crops["crop_42"] + county_crops["crop_51"]
county_crops["crop_22"] = county_crops["crop_22"] + county_crops["crop_23"] + county_crops["crop_24"]
county_crops = county_crops.drop(columns=["crop_51", "crop_23", "crop_24"])

## Crosswalk 2 Stabilty Crop Diversity -> CDL

In [135]:

# production has data from many years 
# This takes each crop from each state and adds the row that is closest to 2017 (the year our cdl data is from) to a dataframe called rel_prod
rel_prod_list = []
for s in production["State_Abbr"].unique():
    state = production[production["State_Abbr"] == s]
    for c in state["Crop_Name"].unique():
        crop = state[state["Crop_Name"] == c]
        rel_prod_list.append(c.iloc[(crop['Year']-2017).abs().argsort()[:1]])

rel_prod = pd.concat(rel_prod_list)

In [136]:
def crop_rollup(crops, rel_prod):
    roll_df = pd.DataFrame()
    for s in rel_prod["State_Abbr"].unique():
        state = rel_prod[rel_prod["State_Abbr"] == s]
        temp = state[state["Crop_Name"].isin(crops)]
        if len(temp) > 0:
            roll_df = pd.concat([
                roll_df, 
                pd.DataFrame({
                    "State_Abbr": s, 
                    "Year": temp["Year"].max(), 
                    "Crop_Name": crops[0], 
                    "Price_Received_USD_kg": temp["Price_Received_USD_kg"].mean(), 
                    "Crop_Area_ha": temp["Crop_Area_ha"].sum(), 
                    "Production_kg": temp["Production_kg"].sum(), 
                    "FIPS": temp["FIPS"].iloc[0], 
                    "kcal_kg": temp["kcal_kg"].mean(),
                    "Production_kcal": temp["Production_kcal"].sum(), 
                    "Production_USD": temp["Production_USD"].sum()
                }, 
                index=[0])
            ])
    roll_df = roll_df.drop_duplicates(subset="State_Abbr", keep="first")
    rel_prod = rel_prod[~rel_prod["Crop_Name"].isin(crops)]
    rel_prod = pd.concat([rel_prod, roll_df])
    return rel_prod

In [137]:
roll1 = ["LETTUCE-HEAD", "LETTUCE-LEAF", "LETTUCE-ROMAINE"]
rel_prod = crop_rollup(roll1, rel_prod)

roll2 = ["PEAS-GREEN", "PEAS-DRY EDIBLE", "PEAS-AUSTRIAN WINTER"]
rel_prod = crop_rollup(roll2, rel_prod)

roll3 = [ "GRAPEFRUIT-ALL CLASSES", "LEMONS-ALL CLASSES", "TANGELOS-ALL CLASSES", "TANGERINES-ALL CLASSES", ]
rel_prod = crop_rollup(roll3, rel_prod)

rel_prod = rel_prod.reset_index(drop=True)

## Merging CDL & Stability Crop Diversity
This is how cdl data is merged with the kcal data. Developing pixel to kcal conversion function. I'm going to develop a national level that will take total national pixels and divide by national kcals. We'll plan to use the state level conversion functions, but this will be good to have if we want a fallback. 

###  National Level

In [138]:
# in counties drop any column whose column name starts with "crop" and is not in final_crop[crop_code] column
# this is to remove any columns that are not in the final crop list
rel_counties = county_crops
for c in county_crops.columns:
    if c.startswith("crop") and c not in final_crops["crop_code"].values:
        rel_counties = rel_counties.drop(c, axis=1)
rel_counties.sample(5)

Unnamed: 0,statefp,countyfp,countyns,geoid,name,namelsad,lsad,classfp,mtfcc,csafp,cbsafp,metdivfp,funcstat,aland,awater,intptlat,intptlon,crop_1,crop_3,crop_4,crop_5,crop_6,crop_10,crop_12,crop_21,crop_22,crop_27,crop_28,crop_29,crop_31,crop_32,crop_33,crop_35,crop_41,crop_42,crop_43,crop_46,crop_48,crop_49,crop_50,crop_52,crop_53,crop_54,crop_66,crop_67,crop_68,crop_69,crop_72,crop_74,crop_75,crop_76,crop_77,crop_204,crop_206,crop_207,crop_208,crop_209,crop_211,crop_212,crop_214,crop_216,crop_218,crop_220,crop_221,crop_222,crop_223,crop_227,crop_229,crop_242,crop_243,crop_244,crop_245,crop_246,crop_248,crop_250,state,postal
1667,40,83,1101829,40083,Logan,Logan County,6,H1,G4020,416.0,36420.0,,A,1926268077,13203700,35.914277,-97.450952,9507.0,,3260.0,21053.0,28.0,,,48.0,,12171.0,220.0,5.0,15259.0,,,,,,,,,,,,235.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Oklahoma,OK
2168,26,57,1622971,26057,Gratiot,Gratiot County,6,H1,G4020,394.0,10940.0,,A,1472098896,8369164,43.292326,-84.60469,390478.0,,47.0,422573.0,3.0,,,20.0,,534.0,3494.0,,,,,,38303.0,,4313.0,,,,10668.0,,1851.0,,,1.0,148.0,,,,,,,,,,,,,,,,,,,404.0,,,2.0,1.0,,,,,,,Michigan,MI
877,13,303,348428,13303,Washington,Washington County,6,H1,G4020,,,,A,1757373959,15099021,32.971843,-82.798112,10861.0,,82.0,5363.0,3.0,10728.0,,1.0,,1179.0,2618.0,629.0,,,,,,,,,1.0,,,,,,,6.0,,,,15622.0,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,Georgia,GA
633,21,47,516870,21047,Christian,Christian County,6,H1,G4020,,17300.0,,A,1858342356,16919159,36.892059,-87.492986,303330.0,,24.0,113953.0,,,,,,13.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Kentucky,KY
1184,42,107,1213685,42107,Schuylkill,Schuylkill County,6,H1,G4020,,39060.0,,A,2016509909,10820294,40.703682,-76.217788,91481.0,,1148.0,60529.0,,,,44.0,,1036.0,654.0,,3.0,,,,,,16.0,,,,,,,,,,47.0,76.0,,,,,,,,,,,,,,19.0,,,,4.0,,,19.0,,2.0,,,,,,Pennsylvania,PA


In [139]:
for c in county_crops.columns:
    if c.startswith("crop"):
        final_crops.loc[final_crops["crop_code"] == c, "total_pixels"] = county_crops[c].sum()

In [140]:
# for every unique value in "Crop_Name" add the values from those, and add them to the final_crops dataframe
for c in rel_prod["Crop_Name"].unique():
    final_crops.loc[final_crops["production_name"] == c, "Crop_Area_ha"] = rel_prod[rel_prod["Crop_Name"] == c]["Crop_Area_ha"].sum()
    final_crops.loc[final_crops["production_name"] == c, "Production_kg"] = rel_prod[rel_prod["Crop_Name"] == c]["Production_kg"].sum()
    final_crops.loc[final_crops["production_name"] == c, "Production_kcal"] = rel_prod[rel_prod["Crop_Name"] == c]["Production_kcal"].sum()
    final_crops.loc[final_crops["production_name"] == c, "Production_USD"] = rel_prod[rel_prod["Crop_Name"] == c]["Production_USD"].sum()
    final_crops.loc[final_crops["production_name"] == c, "Price_Received_USD_kg"] = rel_prod[rel_prod["Crop_Name"] == c]["Price_Received_USD_kg"].mean()

In [141]:
# adds the national level conversion of kcals to pixels
final_crops["kcal_pixel_national"] = final_crops["Production_kcal"] / final_crops["total_pixels"]
# change values that are inf to nan
final_crops.loc[final_crops["kcal_pixel_national"] == np.inf, "kcal_pixel_national"] = np.nan
final_crops.loc[final_crops["total_pixels"].isna(), "total_pixels"] = 0
final_crops["total_pixels"] = final_crops["total_pixels"].astype(int)
final_crops = final_crops[final_crops["total_pixels"] != 0]
# write final_crops to csv as a checkpoint
final_crops.to_csv("final_crops_with_kcal.csv")

In [142]:
final_crops

Unnamed: 0,crop_code,cdl_name,production_name,final_name,kcal_kg,red,green,blue,rollup,total_pixels,Crop_Area_ha,Production_kg,Production_kcal,Production_USD,Price_Received_USD_kg,kcal_pixel_national
0,crop_75,Almonds,ALMONDS-ALL CLASSES,Almonds,5790.0,0,168,132,,5504144,416835.3,1029468000.0,5960618000000.0,5359753000.0,5.578708,1082933.0
1,crop_68,Apples,APPLES-ALL CLASSES,Apples,468.0,185,0,80,,1509000,138096.3,5307201000.0,2483770000000.0,3535388000.0,0.997514,1645971.0
2,crop_223,Apricots,APRICOTS-ALL CLASSES,Apricots,446.0,255,145,171,,1515,4621.611,41419350.0,18473030000.0,41759380.0,1.137218,12193420.0
3,crop_207,Asparagus,ASPARAGUS-ALL CLASSES,Asparagus,106.0,255,102,102,,13630,11027.92,39512400.0,4188314000.0,96511230.0,2.661459,307286.4
5,crop_21,Barley,BARLEY-ALL CLASSES,Barley,3530.0,226,0,127,,9753800,840820.2,3265838000.0,11528410000000.0,605806000.0,0.161614,1181940.0
6,crop_42,Dry Beans,"BEANS-DRY EDIBLE, INCL CHICKPEAS",Dry Beans,3506.0,168,0,0,crop_42; crop_51,53993,845932.8,1692397000.0,5933544000000.0,982747100.0,0.749645,109894700.0
7,crop_242,Blueberries,BLUEBERRIES-TAME,Blueberries,542.0,0,0,153,,791899,34488.06,236373900.0,128114700000.0,774765100.0,4.576441,161781.6
8,crop_214,Broccoli,BROCCOLI-ALL CLASSES,Broccoli,207.0,255,102,102,,69344,52407.93,927777100.0,192049900000.0,866857800.0,0.944755,2769524.0
9,crop_243,Cabbage,CABBAGE-ALL CLASSES,Cabbage,200.0,255,102,102,,98932,26968.84,1178296000.0,235659200000.0,432796800.0,0.351307,2382032.0
10,crop_31,Canola,CANOLA-ALL CLASSES,Canola,6420.0,209,255,0,,8675440,810198.3,1385659000.0,8895931000000.0,488988800.0,0.349213,1025416.0


At this point `final_crops` is more of a national level production dataframe. This next section scopes down to the county production level which is then matched to the county crop pixel level.

### County Level
There are three main dataframes that are important at this point
1. `final_crops` - this is the national level production dataframe
2. `rel_counties` - this is the county level production dataframe for only crops in `final_crops`
3. `rel_prod` - this is the state level production dataframe for only crops in `final_crops` for the year closest to 2017

we need to match production data in `rel_prod` to the pixels in `rel_counties`

In [143]:
# This adds crop_code to rel_prod and drops any rows from rel_prod that are not in final_crops (e.g. Tobacco)
for r in rel_prod["Crop_Name"]:
    # print the crop_code from final_crops if it exists
    if r in final_crops["production_name"].values:
        # add the crop_code to rel_prod
        rel_prod.loc[rel_prod["Crop_Name"] == r, "crop_code"] = final_crops.loc[final_crops["production_name"] == r, "crop_code"].iloc[0]
    else:
        # drop the row from rel_prod if it is not in final_crops
        rel_prod = rel_prod[rel_prod["Crop_Name"] != r]
        print (r)
rel_prod = rel_prod.reset_index(drop=True)


COTTON-UPLAND
HAY-ALL CLASSES
HOPS-ALL CLASSES
COTTON-PIMA
ARTICHOKES-ALL CLASSES
BEANS-SNAP
BRUSSELS SPROUTS-ALL CLASSES
MELONS-HONEYDEW
PEPPERS-BELL
ESCAROLE & ENDIVE-ALL CLASSES
AVOCADOS-ALL CLASSES
BOYSENBERRIES-ALL CLASSES
DATES-ALL CLASSES
FIGS-ALL CLASSES
KIWIFRUIT-ALL CLASSES
PRUNES-ALL CLASSES
RASPBERRIES-ALL CLASSES
BEANS-GREEN, LIMA
SPINACH-ALL CLASSES
HAY-ALL CLASSES
BEANS-SNAP
BEANS-GREEN, LIMA
BEANS-SNAP
COTTON-UPLAND
HAY-ALL CLASSES
TOBACCO-ALL CLASSES
ESCAROLE & ENDIVE-ALL CLASSES
PEPPERS-BELL
OKRA-ALL CLASSES
AVOCADOS-ALL CLASSES
HAY-ALL CLASSES
BEANS-SNAP
BEANS-GREEN, LIMA
HAY-ALL CLASSES
TOBACCO-ALL CLASSES
BEANS-SNAP
HAY-ALL CLASSES
TOBACCO-ALL CLASSES
BEANS-SNAP
BEANS-GREEN, LIMA
BEANS-SNAP
HAY-ALL CLASSES
PEPPERS-BELL
COTTON-UPLAND
HAY-ALL CLASSES
TOBACCO-ALL CLASSES
BEANS-SNAP
PEPPERS-BELL
BEANS-SNAP
HAY-ALL CLASSES
BEETS-ALL CLASSES
ESCAROLE & ENDIVE-ALL CLASSES
PEPPERS-BELL
HAY-ALL CLASSES
TOBACCO-ALL CLASSES
BEANS-SNAP
ESCAROLE & ENDIVE-ALL CLASSES
PEPPERS-BEL

In [144]:
# adding total state pixels to rel_prod
for i in range(len(rel_prod)):
    # get the crop_code from rel_prod
    crop_code = rel_prod.loc[i, "crop_code"]
    # get the state from rel_prod
    state = rel_prod.loc[i, "State_Abbr"]
    # get the value of the column in rel_counties that matches crop_code and where rel_counties["Postal"] matches state
    # then sum the values in that column
    state_pixels = rel_counties.loc[rel_counties["postal"] == state, crop_code].sum()
    # add the value of state_pixels to the row in rel_prod
    rel_prod.loc[i, "state_pixels"] = state_pixels

# add kcal_pixel_state as a column to rel_prod
#this column is the ratio of kcal/pixel for that specific state
rel_prod["kcal_pixel_state"] = rel_prod["Production_kcal"] / rel_prod["state_pixels"]
#this column is the ratio of kcal/pixel for that specific state
rel_prod["kg_pixel_state"] = rel_prod["Production_kg"] / rel_prod["state_pixels"]
rel_prod.loc[rel_prod["kcal_pixel_state"] == np.inf, "kcal_pixel_state"] = np.nan


Code block below adds kcals to rel_counties. The approach here is there are two rows added to the dataframe for each crop code.
- The `kcal_state_crop_code` is the number of calories using the state level conversion. 
- The `kcal_national_crop_code` is the number of calories using the national level conversion.


For every county row, these are rolled up into two two columns `kcal_state` and `kcal_national`
- `kcal_state` is the sum of all the kcal in that county using the state conversion when possible, falling back to the national conversion when state conversion is not available. The state conversion is not available when the production datasest does not have a value for that crop in that state, but there are pixel values for that crop from cdl
- `kcal_national` is the sum of all kcal in that county using the national conversion.


the last two columns added are `states_used` and `national_used`
- `states_used` is the number of state ratios used in the state level conversion. Hypothetically, the most accurate rows would be the ones where only state ratios are used. 
- `national_used` is the number of national ratios used in the state level conversion


We might be able to use the variance in these two columns to see how much the state level conversion differs from the national level conversion for each county.


In [145]:
rel_counties

Unnamed: 0,statefp,countyfp,countyns,geoid,name,namelsad,lsad,classfp,mtfcc,csafp,cbsafp,metdivfp,funcstat,aland,awater,intptlat,intptlon,crop_1,crop_3,crop_4,crop_5,crop_6,crop_10,crop_12,crop_21,crop_22,crop_27,crop_28,crop_29,crop_31,crop_32,crop_33,crop_35,crop_41,crop_42,crop_43,crop_46,crop_48,crop_49,crop_50,crop_52,crop_53,crop_54,crop_66,crop_67,crop_68,crop_69,crop_72,crop_74,crop_75,crop_76,crop_77,crop_204,crop_206,crop_207,crop_208,crop_209,crop_211,crop_212,crop_214,crop_216,crop_218,crop_220,crop_221,crop_222,crop_223,crop_227,crop_229,crop_242,crop_243,crop_244,crop_245,crop_246,crop_248,crop_250,state,postal
0,31,39,835841,31039,Cuming,Cuming County,6,H1,G4020,,,,A,1477645345,10690204,41.915865,-96.788517,649877.0,,230.0,535434.0,,,,,,3.0,848.0,19.0,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,,,,,,Nebraska,NE
1,31,109,835876,31109,Lancaster,Lancaster County,6,H1,G4020,339.0,30700.0,,A,2169272970,22847034,40.783547,-96.688658,557931.0,,2507.0,564856.0,,,,,,439.0,338.0,76.0,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE
2,31,129,835886,31129,Nuckolls,Nuckolls County,6,H1,G4020,,,,A,1489645185,1718484,40.176492,-98.046842,492281.0,,12556.0,307431.0,,,,,,10.0,1569.0,,,,,,,,,,,,,,34.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE
3,31,101,835872,31101,Keith,Keith County,6,H1,G4020,,,,A,2749903240,124629167,41.194245,-101.644449,489931.0,,6553.0,84142.0,1480.0,,,137.0,130062.0,3994.0,7521.0,1826.0,,,,,6431.0,,57.0,,,,,,7954.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE
4,31,137,835890,31137,Phelps,Phelps County,6,H1,G4020,,,,A,1398048634,1646526,40.516365,-99.406557,719100.0,,5636.0,375036.0,,,,,,30.0,757.0,17.0,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3103,44,9,1219782,44009,Washington,Washington County,6,H4,G4020,148.0,39300.0,,N,852827571,604769755,41.396792,-71.620282,9025.0,,,,,,,,,50.0,3.0,,,,,,,,151.0,,,,,,,,,,74.0,1.0,,,,,,,,,,,,,,3.0,,,,4.0,,,12.0,,,,,,,,Rhode Island,RI
3104,44,7,1219781,44007,Providence,Providence County,6,H4,G4020,148.0,39300.0,,N,1060552463,67870199,41.869768,-71.578625,3049.0,,,,,,,,,50.0,5.0,,,,,,,,39.0,,,,,,,,,,296.0,,,,,,,,,,,,,,,11.0,,,,34.0,,,2.0,,,,,,,,Rhode Island,RI
3105,44,1,1219777,44001,Bristol,Bristol County,6,H4,G4020,148.0,39300.0,,N,62500772,53359134,41.706840,-71.286687,391.0,,,,,,,,,2.0,,,,,,,,,2.0,,,,,,,,,,2.0,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,Rhode Island,RI
3106,44,5,1219779,44005,Newport,Newport County,6,H4,G4020,148.0,39300.0,,N,265293780,547001789,41.501045,-71.283063,4613.0,,,,,,,,,135.0,,,,,,,,,1201.0,,,,,,,,,,87.0,16.0,,,,,,,,,,,,,,19.0,,,,19.0,,,19.0,,,,,,,32.0,Rhode Island,RI


In [146]:
rel_prod

Unnamed: 0,State_Abbr,Year,Crop_Name,Price_Received_USD_kg,Crop_Area_ha,Production_kg,FIPS,kcal_kg,Production_kcal,Production_USD,crop_code,state_pixels,kcal_pixel_state,kg_pixel_state
0,CA,2017,TOMATOES-ALL CLASSES,0.106924,100242.816673,1.005575e+10,6,187.000000,1.880425e+12,1.003435e+09,crop_54,562888.0,3.340674e+06,1.786456e+04
1,CA,2017,CARROTS-ALL CLASSES,0.648160,23674.625658,9.817999e+08,6,365.000000,3.583570e+11,5.938864e+08,crop_206,129418.0,2.768989e+06,7.586270e+03
2,CA,2017,BARLEY-ALL CLASSES,0.221381,11736.139215,3.156998e+07,6,3530.000000,1.114420e+11,6.522491e+06,crop_21,478593.0,2.328534e+05,6.596415e+01
3,CA,2017,"BEANS-DRY EDIBLE, INCL CHICKPEAS",1.349230,20113.314448,4.740036e+07,6,3506.000000,1.661857e+11,5.968513e+07,crop_42,0.0,,inf
4,CA,2017,CORN-ALL CLASSES-GRAIN,0.165347,32375.556455,3.393600e+08,6,3650.000000,1.238664e+12,5.236658e+07,crop_1,464094.0,2.668994e+06,7.312312e+02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,SD,2017,PEAS-GREEN,0.246918,14164.305949,2.381358e+07,46,2620.000000,6.239158e+10,5.487515e+06,crop_53,143015.0,4.362590e+05,1.665111e+02
752,CA,2017,GRAPEFRUIT-ALL CLASSES,0.569895,46701.740186,1.767196e+09,6,280.666667,5.798218e+11,5.744850e+08,crop_72,113684.0,5.100294e+06,1.554481e+04
753,FL,2017,GRAPEFRUIT-ALL CLASSES,0.469951,18494.536625,3.858034e+08,12,314.666667,8.182288e+10,1.456472e+08,crop_72,256014.0,3.196032e+05,1.506962e+03
754,TX,2017,GRAPEFRUIT-ALL CLASSES,0.357700,6636.989073,1.741795e+08,48,160.000000,2.786872e+10,5.814527e+07,crop_72,27461.0,1.014847e+06,6.342796e+03


In [147]:
for i in range(len(rel_counties)):
    total_state_kcal = 0
    total_state_kg = 0
    state = rel_counties.loc[i, "postal"]
    for c in rel_counties.columns:
        if c.startswith("crop") and not pd.isna(rel_counties.loc[i, c]):
            crop_code = c
            crop_pixels = rel_counties.loc[i, c]
            state_kcal_col_name = "kcal_state_"+crop_code
            state_kg_col_name = "kg_state_"+crop_code
            try:
                # kcal calculation
                state_kcal = rel_prod.loc[(rel_prod["crop_code"] == crop_code) & (rel_prod["State_Abbr"] == state), "kcal_pixel_state"].iloc[0]
                # kg calculation
                state_kg = rel_prod.loc[(rel_prod["crop_code"] == crop_code) & (rel_prod["State_Abbr"] == state), "kg_pixel_state"].iloc[0]
            except:
                state_kcal = np.nan
                state_kg = np.nan
            rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
            rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
            total_state_kcal = total_state_kcal + (state_kcal * crop_pixels)
            total_state_kg = total_state_kg + (state_kg * crop_pixels)

  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state_kg * crop_pixels
  rel_counties.loc[i, state_kcal_col_name] = state_kcal * crop_pixels
  rel_counties.loc[i, state_kg_col_name] = state

In [148]:
rel_counties

Unnamed: 0,statefp,countyfp,countyns,geoid,name,namelsad,lsad,classfp,mtfcc,csafp,cbsafp,metdivfp,funcstat,aland,awater,intptlat,intptlon,crop_1,crop_3,crop_4,crop_5,crop_6,crop_10,crop_12,crop_21,crop_22,crop_27,crop_28,crop_29,crop_31,crop_32,crop_33,crop_35,crop_41,crop_42,crop_43,crop_46,crop_48,crop_49,crop_50,crop_52,crop_53,crop_54,crop_66,crop_67,crop_68,crop_69,crop_72,crop_74,crop_75,crop_76,crop_77,crop_204,crop_206,crop_207,crop_208,crop_209,crop_211,crop_212,crop_214,crop_216,crop_218,crop_220,crop_221,crop_222,crop_223,crop_227,crop_229,crop_242,crop_243,crop_244,crop_245,crop_246,crop_248,crop_250,state,postal,kcal_state_crop_1,kg_state_crop_1,kcal_state_crop_4,kg_state_crop_4,kcal_state_crop_5,kg_state_crop_5,kcal_state_crop_27,kg_state_crop_27,kcal_state_crop_28,kg_state_crop_28,kcal_state_crop_29,kg_state_crop_29,kcal_state_crop_53,kg_state_crop_53,kcal_state_crop_243,kg_state_crop_243,kcal_state_crop_6,kg_state_crop_6,kcal_state_crop_21,kg_state_crop_21,kcal_state_crop_22,kg_state_crop_22,kcal_state_crop_41,kg_state_crop_41,kcal_state_crop_43,kg_state_crop_43,kcal_state_crop_52,kg_state_crop_52,kcal_state_crop_48,kg_state_crop_48,kcal_state_crop_69,kg_state_crop_69,kcal_state_crop_229,kg_state_crop_229,kcal_state_crop_12,kg_state_crop_12,kcal_state_crop_242,kg_state_crop_242,kcal_state_crop_250,kg_state_crop_250,kcal_state_crop_66,kg_state_crop_66,kcal_state_crop_68,kg_state_crop_68,kcal_state_crop_76,kg_state_crop_76,kcal_state_crop_77,kg_state_crop_77,kcal_state_crop_206,kg_state_crop_206,kcal_state_crop_221,kg_state_crop_221,kcal_state_crop_50,kg_state_crop_50,kcal_state_crop_244,kg_state_crop_244,kcal_state_crop_31,kg_state_crop_31,kcal_state_crop_33,kg_state_crop_33,kcal_state_crop_35,kg_state_crop_35,kcal_state_crop_42,kg_state_crop_42,kcal_state_crop_49,kg_state_crop_49,kcal_state_crop_67,kg_state_crop_67,kcal_state_crop_207,kg_state_crop_207,kcal_state_crop_214,kg_state_crop_214,kcal_state_crop_208,kg_state_crop_208,kcal_state_crop_209,kg_state_crop_209,kcal_state_crop_216,kg_state_crop_216,kcal_state_crop_220,kg_state_crop_220,kcal_state_crop_222,kg_state_crop_222,kcal_state_crop_223,kg_state_crop_223,kcal_state_crop_246,kg_state_crop_246,kcal_state_crop_218,kg_state_crop_218,kcal_state_crop_227,kg_state_crop_227,kcal_state_crop_74,kg_state_crop_74,kcal_state_crop_204,kg_state_crop_204,kcal_state_crop_10,kg_state_crop_10,kcal_state_crop_32,kg_state_crop_32,kcal_state_crop_3,kg_state_crop_3,kcal_state_crop_211,kg_state_crop_211,kcal_state_crop_72,kg_state_crop_72,kcal_state_crop_212,kg_state_crop_212,kcal_state_crop_46,kg_state_crop_46,kcal_state_crop_75,kg_state_crop_75,kcal_state_crop_54,kg_state_crop_54,kcal_state_crop_245,kg_state_crop_245,kcal_state_crop_248,kg_state_crop_248
0,31,39,835841,31039,Cuming,Cuming County,6,H1,G4020,,,,A,1477645345,10690204,41.915865,-96.788517,649877.0,,230.0,535434.0,,,,,,3.0,848.0,19.0,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,,,,,,Nebraska,NE,2.558894e+12,7.010668e+08,3.086090e+08,9.380214e+04,9.327255e+11,2.091313e+08,8.466448e+05,250.486633,2.712796e+08,69737.675175,8.142717e+06,2154.157863,5.335698e+05,2.036526e+02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,31,109,835876,31109,Lancaster,Lancaster County,6,H1,G4020,339.0,30700.0,,A,2169272970,22847034,40.783547,-96.688658,557931.0,,2507.0,564856.0,,,,,,439.0,338.0,76.0,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,2.196856e+12,6.018784e+08,3.363838e+09,1.022443e+06,9.839786e+11,2.206230e+08,1.238924e+08,36654.543901,1.081279e+08,27796.384680,3.257087e+07,8616.631452,1.387282e+07,5.294968e+03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,31,129,835886,31129,Nuckolls,Nuckolls County,6,H1,G4020,,,,A,1489645185,1718484,40.176492,-98.046842,492281.0,,12556.0,307431.0,,,,,,10.0,1569.0,,,,,,,,,,,,,,34.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,1.938359e+12,5.310572e+08,1.684737e+10,5.120781e+06,5.355445e+11,1.200772e+08,2.822149e+06,834.955442,5.019312e+08,129031.146638,,,1.814137e+07,6.924189e+03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,31,101,835872,31101,Keith,Keith County,6,H1,G4020,,,,A,2749903240,124629167,41.194245,-101.644449,489931.0,,6553.0,84142.0,1480.0,,,137.0,130062.0,3994.0,7521.0,1826.0,,,,,6431.0,,57.0,,,,,,7954.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,1.929106e+12,5.285221e+08,8.792674e+09,2.672545e+06,1.465753e+11,3.286441e+07,1.127166e+09,333481.203506,2.406007e+09,618510.678052,7.825579e+08,207025.908310,4.244014e+09,1.619853e+06,,,1.265806e+09,401333.642688,1.235875e+08,35010.624953,2.312620e+11,6.928161e+07,4.475795e+10,6.393993e+07,5.008169e+08,8.664651e+05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,31,137,835890,31137,Phelps,Phelps County,6,H1,G4020,,,,A,1398048634,1646526,40.516365,-99.406557,719100.0,,5636.0,375036.0,,,,,,30.0,757.0,17.0,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,2.831460e+12,7.757424e+08,7.562263e+09,2.298560e+06,6.533123e+11,1.464826e+08,8.466448e+06,2504.866326,2.421682e+08,62254.033145,7.285589e+06,1927.404404,1.600710e+06,6.109578e+02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3103,44,9,1219782,44009,Washington,Washington County,6,H4,G4020,148.0,39300.0,,N,852827571,604769755,41.396792,-71.620282,9025.0,,,,,,,,,50.0,3.0,,,,,,,,151.0,,,,,,,,,,74.0,1.0,,,,,,,,,,,,,,3.0,,,,4.0,,,12.0,,,,,,,,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,2.277469e+08,3.940258e+05,,,,,,,,,,,,,,,,,8.071585e+07,172469.755889,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3104,44,7,1219781,44007,Providence,Providence County,6,H4,G4020,148.0,39300.0,,N,1060552463,67870199,41.869768,-71.578625,3049.0,,,,,,,,,50.0,5.0,,,,,,,,39.0,,,,,,,,,,296.0,,,,,,,,,,,,,,,11.0,,,,34.0,,,2.0,,,,,,,,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,5.882206e+07,1.017683e+05,,,,,,,,,,,,,,,,,3.228634e+08,689879.023555,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3105,44,1,1219777,44001,Bristol,Bristol County,6,H4,G4020,148.0,39300.0,,N,62500772,53359134,41.706840,-71.286687,391.0,,,,,,,,,2.0,,,,,,,,,2.0,,,,,,,,,,2.0,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,3.016516e+06,5.218885e+03,,,,,,,,,,,,,,,,,2.181509e+06,4661.344754,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3106,44,5,1219779,44005,Newport,Newport County,6,H4,G4020,148.0,39300.0,,N,265293780,547001789,41.501045,-71.283063,4613.0,,,,,,,,,135.0,,,,,,,,,1201.0,,,,,,,,,,87.0,16.0,,,,,,,,,,,,,,19.0,,,,19.0,,,19.0,,,,,,,32.0,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,1.811418e+09,3.133941e+06,,,,,,,,,,,,,,,,,9.489566e+07,202768.496788,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [149]:
# rel_prod_state = rel_prod but delete any column that begins with kcal_national because we only care about the counties with tracked production data
rel_counties_state = rel_counties.copy()
for c in rel_counties_state.columns:
    if c.startswith("kcal_national"):
        rel_counties_state = rel_counties_state.drop(c, axis=1)


In [150]:
rel_counties_national = rel_counties.copy()
for c in rel_counties_national.columns:
    if c.startswith("kcal_state"):
        rel_counties_national = rel_counties_national.drop(c, axis=1)

In [151]:
rel_counties_state

Unnamed: 0,statefp,countyfp,countyns,geoid,name,namelsad,lsad,classfp,mtfcc,csafp,cbsafp,metdivfp,funcstat,aland,awater,intptlat,intptlon,crop_1,crop_3,crop_4,crop_5,crop_6,crop_10,crop_12,crop_21,crop_22,crop_27,crop_28,crop_29,crop_31,crop_32,crop_33,crop_35,crop_41,crop_42,crop_43,crop_46,crop_48,crop_49,crop_50,crop_52,crop_53,crop_54,crop_66,crop_67,crop_68,crop_69,crop_72,crop_74,crop_75,crop_76,crop_77,crop_204,crop_206,crop_207,crop_208,crop_209,crop_211,crop_212,crop_214,crop_216,crop_218,crop_220,crop_221,crop_222,crop_223,crop_227,crop_229,crop_242,crop_243,crop_244,crop_245,crop_246,crop_248,crop_250,state,postal,kcal_state_crop_1,kg_state_crop_1,kcal_state_crop_4,kg_state_crop_4,kcal_state_crop_5,kg_state_crop_5,kcal_state_crop_27,kg_state_crop_27,kcal_state_crop_28,kg_state_crop_28,kcal_state_crop_29,kg_state_crop_29,kcal_state_crop_53,kg_state_crop_53,kcal_state_crop_243,kg_state_crop_243,kcal_state_crop_6,kg_state_crop_6,kcal_state_crop_21,kg_state_crop_21,kcal_state_crop_22,kg_state_crop_22,kcal_state_crop_41,kg_state_crop_41,kcal_state_crop_43,kg_state_crop_43,kcal_state_crop_52,kg_state_crop_52,kcal_state_crop_48,kg_state_crop_48,kcal_state_crop_69,kg_state_crop_69,kcal_state_crop_229,kg_state_crop_229,kcal_state_crop_12,kg_state_crop_12,kcal_state_crop_242,kg_state_crop_242,kcal_state_crop_250,kg_state_crop_250,kcal_state_crop_66,kg_state_crop_66,kcal_state_crop_68,kg_state_crop_68,kcal_state_crop_76,kg_state_crop_76,kcal_state_crop_77,kg_state_crop_77,kcal_state_crop_206,kg_state_crop_206,kcal_state_crop_221,kg_state_crop_221,kcal_state_crop_50,kg_state_crop_50,kcal_state_crop_244,kg_state_crop_244,kcal_state_crop_31,kg_state_crop_31,kcal_state_crop_33,kg_state_crop_33,kcal_state_crop_35,kg_state_crop_35,kcal_state_crop_42,kg_state_crop_42,kcal_state_crop_49,kg_state_crop_49,kcal_state_crop_67,kg_state_crop_67,kcal_state_crop_207,kg_state_crop_207,kcal_state_crop_214,kg_state_crop_214,kcal_state_crop_208,kg_state_crop_208,kcal_state_crop_209,kg_state_crop_209,kcal_state_crop_216,kg_state_crop_216,kcal_state_crop_220,kg_state_crop_220,kcal_state_crop_222,kg_state_crop_222,kcal_state_crop_223,kg_state_crop_223,kcal_state_crop_246,kg_state_crop_246,kcal_state_crop_218,kg_state_crop_218,kcal_state_crop_227,kg_state_crop_227,kcal_state_crop_74,kg_state_crop_74,kcal_state_crop_204,kg_state_crop_204,kcal_state_crop_10,kg_state_crop_10,kcal_state_crop_32,kg_state_crop_32,kcal_state_crop_3,kg_state_crop_3,kcal_state_crop_211,kg_state_crop_211,kcal_state_crop_72,kg_state_crop_72,kcal_state_crop_212,kg_state_crop_212,kcal_state_crop_46,kg_state_crop_46,kcal_state_crop_75,kg_state_crop_75,kcal_state_crop_54,kg_state_crop_54,kcal_state_crop_245,kg_state_crop_245,kcal_state_crop_248,kg_state_crop_248
0,31,39,835841,31039,Cuming,Cuming County,6,H1,G4020,,,,A,1477645345,10690204,41.915865,-96.788517,649877.0,,230.0,535434.0,,,,,,3.0,848.0,19.0,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,,,,,,Nebraska,NE,2.558894e+12,7.010668e+08,3.086090e+08,9.380214e+04,9.327255e+11,2.091313e+08,8.466448e+05,250.486633,2.712796e+08,69737.675175,8.142717e+06,2154.157863,5.335698e+05,2.036526e+02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,31,109,835876,31109,Lancaster,Lancaster County,6,H1,G4020,339.0,30700.0,,A,2169272970,22847034,40.783547,-96.688658,557931.0,,2507.0,564856.0,,,,,,439.0,338.0,76.0,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,2.196856e+12,6.018784e+08,3.363838e+09,1.022443e+06,9.839786e+11,2.206230e+08,1.238924e+08,36654.543901,1.081279e+08,27796.384680,3.257087e+07,8616.631452,1.387282e+07,5.294968e+03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,31,129,835886,31129,Nuckolls,Nuckolls County,6,H1,G4020,,,,A,1489645185,1718484,40.176492,-98.046842,492281.0,,12556.0,307431.0,,,,,,10.0,1569.0,,,,,,,,,,,,,,34.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,1.938359e+12,5.310572e+08,1.684737e+10,5.120781e+06,5.355445e+11,1.200772e+08,2.822149e+06,834.955442,5.019312e+08,129031.146638,,,1.814137e+07,6.924189e+03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,31,101,835872,31101,Keith,Keith County,6,H1,G4020,,,,A,2749903240,124629167,41.194245,-101.644449,489931.0,,6553.0,84142.0,1480.0,,,137.0,130062.0,3994.0,7521.0,1826.0,,,,,6431.0,,57.0,,,,,,7954.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,1.929106e+12,5.285221e+08,8.792674e+09,2.672545e+06,1.465753e+11,3.286441e+07,1.127166e+09,333481.203506,2.406007e+09,618510.678052,7.825579e+08,207025.908310,4.244014e+09,1.619853e+06,,,1.265806e+09,401333.642688,1.235875e+08,35010.624953,2.312620e+11,6.928161e+07,4.475795e+10,6.393993e+07,5.008169e+08,8.664651e+05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,31,137,835890,31137,Phelps,Phelps County,6,H1,G4020,,,,A,1398048634,1646526,40.516365,-99.406557,719100.0,,5636.0,375036.0,,,,,,30.0,757.0,17.0,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Nebraska,NE,2.831460e+12,7.757424e+08,7.562263e+09,2.298560e+06,6.533123e+11,1.464826e+08,8.466448e+06,2504.866326,2.421682e+08,62254.033145,7.285589e+06,1927.404404,1.600710e+06,6.109578e+02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3103,44,9,1219782,44009,Washington,Washington County,6,H4,G4020,148.0,39300.0,,N,852827571,604769755,41.396792,-71.620282,9025.0,,,,,,,,,50.0,3.0,,,,,,,,151.0,,,,,,,,,,74.0,1.0,,,,,,,,,,,,,,3.0,,,,4.0,,,12.0,,,,,,,,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,2.277469e+08,3.940258e+05,,,,,,,,,,,,,,,,,8.071585e+07,172469.755889,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3104,44,7,1219781,44007,Providence,Providence County,6,H4,G4020,148.0,39300.0,,N,1060552463,67870199,41.869768,-71.578625,3049.0,,,,,,,,,50.0,5.0,,,,,,,,39.0,,,,,,,,,,296.0,,,,,,,,,,,,,,,11.0,,,,34.0,,,2.0,,,,,,,,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,5.882206e+07,1.017683e+05,,,,,,,,,,,,,,,,,3.228634e+08,689879.023555,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3105,44,1,1219777,44001,Bristol,Bristol County,6,H4,G4020,148.0,39300.0,,N,62500772,53359134,41.706840,-71.286687,391.0,,,,,,,,,2.0,,,,,,,,,2.0,,,,,,,,,,2.0,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,3.016516e+06,5.218885e+03,,,,,,,,,,,,,,,,,2.181509e+06,4661.344754,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3106,44,5,1219779,44005,Newport,Newport County,6,H4,G4020,148.0,39300.0,,N,265293780,547001789,41.501045,-71.283063,4613.0,,,,,,,,,135.0,,,,,,,,,1201.0,,,,,,,,,,87.0,16.0,,,,,,,,,,,,,,19.0,,,,19.0,,,19.0,,,,,,,32.0,Rhode Island,RI,,,,,,,,,,,,,,,,,,,,,,,,,1.811418e+09,3.133941e+06,,,,,,,,,,,,,,,,,9.489566e+07,202768.496788,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [152]:
# using this to do a spot check on the kcal values
check_dict = {}
for c in rel_counties.columns:
    if c.startswith("crop") or c.startswith("kcal") or c.startswith("kg"):
        check_dict[c] = rel_counties[c].sum()
check_dict

{'crop_1': 373160347.0,
 'crop_3': 9585870.0,
 'crop_4': 23513578.0,
 'crop_5': 367935660.0,
 'crop_6': 5530138.0,
 'crop_10': 5676394.0,
 'crop_12': 628641.0,
 'crop_21': 9753800.0,
 'crop_22': 71993753.0,
 'crop_27': 2891576.0,
 'crop_28': 7090564.0,
 'crop_29': 3108517.0,
 'crop_31': 8675440.0,
 'crop_32': 941807.0,
 'crop_33': 745438.0,
 'crop_35': 373587.0,
 'crop_41': 4746110.0,
 'crop_42': 53993.0,
 'crop_43': 3739990.0,
 'crop_46': 541786.0,
 'crop_48': 237007.0,
 'crop_49': 694693.0,
 'crop_50': 277898.0,
 'crop_52': 5087509.0,
 'crop_53': 6044885.0,
 'crop_54': 609882.0,
 'crop_66': 750322.0,
 'crop_67': 200828.0,
 'crop_68': 1509000.0,
 'crop_69': 4389297.0,
 'crop_72': 453184.0,
 'crop_74': 2725787.0,
 'crop_75': 5504144.0,
 'crop_76': 2356966.0,
 'crop_77': 161044.0,
 'crop_204': 1786918.0,
 'crop_206': 221591.0,
 'crop_207': 13630.0,
 'crop_208': 90238.0,
 'crop_209': 168005.0,
 'crop_211': 158005.0,
 'crop_212': 3562420.0,
 'crop_214': 69344.0,
 'crop_216': 87772.0,
 'cr

At this point, we have three fully connected dataframes
- `final_crops` - national level production data
- `rel_prod` - state level production data
- `rel_counties` - county level production data

Each of these have production in pixels, and production in kcals. Writing these to files below.

In [153]:
# write rel_counties to files
rel_counties_state.to_csv("../synced-data/origins_data/state_production-v2.csv")
final_crops.to_csv("../synced-data/origins_data/national_production-v2.csv")


## Crosswalk 3 County Production -> Consumption by income
1. drop irrelevent crops, do the rollups as needed, and rename all columns to match the demographic consumption data
2. adjust the county_production data by how much was consumed by each demographic

*Note we are using income as it's most indicative of the consumption*

In [154]:
# Run this if starting from this cell
county_prod = pd.read_csv("../synced-data/origins_data/state_production-v2.csv")
county_prod = county_prod.drop(["Unnamed: 0"], axis=1)
income_consumption = pd.read_csv("../synced-data/destination_data/income-consumption.csv")

### Consumption by income

In [155]:
# rolling up income_consumption first

# keep only most recent data 20087-08
for c in income_consumption.columns:
    if c.startswith("low") or c.startswith("high"):
        if "2007" not in c:
            income_consumption = income_consumption.drop(c, axis=1)

# convert pounds to kilograms
income_consumption["low_2007-08"] = income_consumption["low_2007-08"] * 0.453592
income_consumption["high_2007-08"] = income_consumption["high_2007-08"] * 0.453592

# make an average column that is the average of low and high and income consumption
income_consumption["avg_2007-08"] = (income_consumption["low_2007-08"] + income_consumption["high_2007-08"]) / 2

income_consumption

Unnamed: 0,Food Type,low_2007-08,high_2007-08,avg_2007-08
0,Fruit; total,53.233557,54.762162,53.997860
1,Apples; total,14.106711,13.063450,13.585080
2,Apples from fruit,5.402281,6.300393,5.851337
3,Apple juice,8.758862,6.504509,7.631685
4,Bananas,4.295516,4.944153,4.619835
...,...,...,...,...
58,Oils; other,0.462664,0.589670,0.526167
59,Caloric sweeteners,36.772703,35.720370,36.246537
60,Nuts; total,2.653513,4.694677,3.674095
61,Peanuts,2.023020,3.016387,2.519704


In [156]:
income_consumption = income_consumption.replace("Apples; total", "Apples")
income_consumption = income_consumption.replace("Oranges; total", "Oranges")

curated_crop_names = ['Tree nuts', 'Apples', 'Stone fruits', 'Legumes', 'Berries', 'Broccoli and cauliflower', 'Carrots', 'Celery', 'Corn flour', 'Cucumbers', 'Grapes', 'Lettuce', 'Melons', 'Oat flour', 'Onions', 'Oranges', 'Peanuts', 'Green peas', 'Peppers', 'Potatoes', 'Rice dried', 'Sweet corn', 'Tomatoes', 'Wheat flour', 'Other citrus fruits', "Grains; total", "Brassica; total", "Bananas", "Tropical fruits"]
# Drop all rows that are not in curated_crop_names
mask = income_consumption['Food Type'].isin(curated_crop_names)
income_consumption = income_consumption.loc[mask]
income_consumption = income_consumption.reset_index(drop=True)

In [157]:
income_consumption

Unnamed: 0,Food Type,low_2007-08,high_2007-08,avg_2007-08
0,Apples,14.106711,13.06345,13.58508
1,Bananas,4.295516,4.944153,4.619835
2,Berries,2.476612,3.347509,2.912061
3,Grapes,3.96893,4.331804,4.150367
4,Melons,1.873335,2.544651,2.208993
5,Oranges,15.540062,15.06379,15.301926
6,Other citrus fruits,2.458469,2.875773,2.667121
7,Stone fruits,2.662585,3.528946,3.095765
8,Tropical fruits,2.889381,2.394966,2.642173
9,Brassica; total,3.40194,4.599423,4.000681


In [158]:
# creating an other grain row from Total grain - ('Corn flour' + 'Wheat flour' + 'Rice dried' = 'Oat flour')
grain_rows = income_consumption[income_consumption['Food Type'].isin(['Corn flour', 'Wheat flour', 'Rice dried', 'Oat flour'])]
grain_sum = pd.DataFrame(grain_rows.sum()).transpose()
grain_sum["Food Type"] = "Other grain"
grain_sum["low_2007-08"] = income_consumption.loc[income_consumption["Food Type"] == "Grains; total", "low_2007-08"].iloc[0] - grain_sum["low_2007-08"]
grain_sum["high_2007-08"] = income_consumption.loc[income_consumption["Food Type"] == "Grains; total", "high_2007-08"].iloc[0] - grain_sum["high_2007-08"]
grain_sum["avg_2007-08"] = income_consumption.loc[income_consumption["Food Type"] == "Grains; total", "avg_2007-08"].iloc[0] - grain_sum["avg_2007-08"]
income_consumption = pd.concat([income_consumption, grain_sum], ignore_index=True)
income_consumption = income_consumption[income_consumption['Food Type'] != 'Grains; total']
income_consumption

Unnamed: 0,Food Type,low_2007-08,high_2007-08,avg_2007-08
0,Apples,14.106711,13.06345,13.58508
1,Bananas,4.295516,4.944153,4.619835
2,Berries,2.476612,3.347509,2.912061
3,Grapes,3.96893,4.331804,4.150367
4,Melons,1.873335,2.544651,2.208993
5,Oranges,15.540062,15.06379,15.301926
6,Other citrus fruits,2.458469,2.875773,2.667121
7,Stone fruits,2.662585,3.528946,3.095765
8,Tropical fruits,2.889381,2.394966,2.642173
9,Brassica; total,3.40194,4.599423,4.000681


In [159]:
# creating radishes and cabbage rows from 'Brassica; total' - ('Broccoli and cauliflower')/2

# create a new row called radishes that is Brassica; total- Broccoli and cauliflower 
Radishes = income_consumption[income_consumption['Food Type'] == 'Brassica; total'].copy()
Radishes.loc[Radishes['Food Type'] == "Brassica; total", 'Food Type'] = "Radishes"
Radishes.loc[:, 'low_2007-08'] = (Radishes['low_2007-08'] - income_consumption.loc[income_consumption['Food Type'] == 'Broccoli and cauliflower', 'low_2007-08'].iloc[0]) / 2
Radishes.loc[:, 'high_2007-08'] = (Radishes['high_2007-08'] - income_consumption.loc[income_consumption['Food Type'] == 'Broccoli and cauliflower', 'high_2007-08'].iloc[0]) / 2
Radishes.loc[:, 'avg_2007-08'] = (Radishes['avg_2007-08'] - income_consumption.loc[income_consumption['Food Type'] == 'Broccoli and cauliflower', 'avg_2007-08'].iloc[0]) / 2


Cabbage = income_consumption[income_consumption['Food Type'] == 'Brassica; total'].copy()
Cabbage.loc[Cabbage['Food Type'] == 'Brassica; total', 'Food Type'] = 'Cabbage'
Cabbage.loc[:, 'low_2007-08'] = (Cabbage['low_2007-08'] - income_consumption.loc[income_consumption['Food Type'] == 'Broccoli and cauliflower', 'low_2007-08'].iloc[0]) / 2
Cabbage.loc[:, 'high_2007-08'] = (Cabbage['high_2007-08'] - income_consumption.loc[income_consumption['Food Type'] == 'Broccoli and cauliflower', 'high_2007-08'].iloc[0]) / 2
Cabbage.loc[:, 'avg_2007-08'] = (Cabbage['avg_2007-08'] - income_consumption.loc[income_consumption['Food Type'] == 'Broccoli and cauliflower', 'avg_2007-08'].iloc[0]) / 2

income_consumption = pd.concat([income_consumption, Radishes, Cabbage], ignore_index=True)
income_consumption

Unnamed: 0,Food Type,low_2007-08,high_2007-08,avg_2007-08
0,Apples,14.106711,13.06345,13.58508
1,Bananas,4.295516,4.944153,4.619835
2,Berries,2.476612,3.347509,2.912061
3,Grapes,3.96893,4.331804,4.150367
4,Melons,1.873335,2.544651,2.208993
5,Oranges,15.540062,15.06379,15.301926
6,Other citrus fruits,2.458469,2.875773,2.667121
7,Stone fruits,2.662585,3.528946,3.095765
8,Tropical fruits,2.889381,2.394966,2.642173
9,Brassica; total,3.40194,4.599423,4.000681


In [160]:
# Probably should have done this to begin with, but I'm going to pivot the table so that the food types are the columns
income_consumption = income_consumption.set_index("Food Type").transpose()
income_consumption

Food Type,Apples,Bananas,Berries,Grapes,Melons,Oranges,Other citrus fruits,Stone fruits,Tropical fruits,Brassica; total,Broccoli and cauliflower,Carrots,Celery,Cucumbers,Green peas,Lettuce,Onions,Peppers,Tomatoes,Sweet corn,Potatoes,Corn flour,Oat flour,Rice dried,Wheat flour,Peanuts,Tree nuts,Other grain,Radishes,Cabbage
low_2007-08,14.106711,4.295516,2.476612,3.96893,1.873335,15.540062,2.458469,2.662585,2.889381,3.40194,1.605716,1.560356,1.138516,1.020582,0.771106,4.966832,3.442763,3.011851,12.764079,2.739696,22.366622,6.90367,1.006974,5.511143,41.317695,2.02302,0.653172,4.21387,0.898112,0.898112
high_2007-08,13.06345,4.944153,3.347509,4.331804,2.544651,15.06379,2.875773,3.528946,2.394966,4.599423,2.385894,2.0457,1.329025,1.03419,0.802858,7.153146,4.100472,3.592449,14.578447,2.884845,24.394178,5.610933,1.424279,5.411353,45.613212,3.016387,1.660147,4.939617,1.106764,1.106764
avg_2007-08,13.58508,4.619835,2.912061,4.150367,2.208993,15.301926,2.667121,3.095765,2.642173,4.000681,1.995805,1.803028,1.23377,1.027386,0.786982,6.059989,3.771617,3.30215,13.671263,2.81227,23.3804,6.257302,1.215627,5.461248,43.465453,2.519704,1.15666,4.576743,1.002438,1.002438


### County Production

In [161]:
# create Tree Nut column from crop_75, crop_74, crop_204, crop_76
county_prod["pixels_Tree nuts"] = county_prod["crop_75"] + county_prod["crop_74"] + county_prod["crop_204"] + county_prod["crop_76"]
county_prod["kcal_Tree nuts"] = county_prod["kcal_state_crop_75"] + county_prod["kcal_state_crop_74"] + county_prod["kcal_state_crop_204"] + county_prod["kcal_state_crop_76"]
county_prod["kg_Tree nuts"] = county_prod["kg_state_crop_75"] + county_prod["kg_state_crop_74"] + county_prod["kg_state_crop_204"] + county_prod["kg_state_crop_76"]
county_prod = county_prod.drop(["crop_75", "crop_74", "crop_204", "crop_76"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_75", "kcal_state_crop_74", "kcal_state_crop_204", "kcal_state_crop_76"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_75", "kg_state_crop_74", "kg_state_crop_204", "kg_state_crop_76"], axis=1)

# create Other grain column from crop_27 and crop_21
county_prod["pixels_Other grain"] = county_prod["crop_27"] + county_prod["crop_21"]
county_prod["kcal_Other grain"] = county_prod["kcal_state_crop_27"] + county_prod["kcal_state_crop_21"]
county_prod["kg_Other grain"] = county_prod["kg_state_crop_27"] + county_prod["kg_state_crop_21"]
county_prod = county_prod.drop(["crop_27", "crop_21"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_27", "kcal_state_crop_21"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_27", "kg_state_crop_21"], axis=1)

# create Broccoli and cauliflower column from crop_214 and crop_244
county_prod["pixels_Broccoli and cauliflower"] = county_prod["crop_214"] + county_prod["crop_244"]
county_prod["kcal_Broccoli and cauliflower"] = county_prod["kcal_state_crop_214"] + county_prod["kcal_state_crop_244"]
county_prod["kg_Broccoli and cauliflower"] = county_prod["kg_state_crop_214"] + county_prod["kg_state_crop_244"]
county_prod = county_prod.drop(["crop_214", "crop_244"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_214", "kcal_state_crop_244"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_214", "kg_state_crop_244"], axis=1)

# create Stone fruits column from crop_223  crop_66  crop_67  crop_220  crop_210
county_prod["pixels_Stone fruits"] = county_prod["crop_223"] + county_prod["crop_66"] + county_prod["crop_67"] + county_prod["crop_220"]
county_prod["kcal_Stone fruits"] = county_prod["kcal_state_crop_223"] + county_prod["kcal_state_crop_66"] + county_prod["kcal_state_crop_67"] + county_prod["kcal_state_crop_220"]
county_prod["kg_Stone fruits"] = county_prod["kg_state_crop_223"] + county_prod["kg_state_crop_66"] + county_prod["kg_state_crop_67"] + county_prod["kg_state_crop_220"]
county_prod = county_prod.drop(["crop_223", "crop_66", "crop_67", "crop_220"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_223", "kcal_state_crop_66", "kcal_state_crop_67", "kcal_state_crop_220"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_223", "kg_state_crop_66", "kg_state_crop_67", "kg_state_crop_220"], axis=1)

# create Berries column from crop_242 crop_250 crop_221
county_prod["pixels_Berries"] = county_prod["crop_242"] + county_prod["crop_250"] + county_prod["crop_221"]
county_prod["kcal_Berries"] = county_prod["kcal_state_crop_242"] + county_prod["kcal_state_crop_250"] + county_prod["kcal_state_crop_221"]
county_prod["kg_Berries"] = county_prod["kg_state_crop_242"] + county_prod["kg_state_crop_250"] + county_prod["kg_state_crop_221"]
county_prod = county_prod.drop(["crop_242", "crop_250", "crop_221"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_242", "kcal_state_crop_250", "kcal_state_crop_221"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_242", "kg_state_crop_250", "kg_state_crop_221"], axis=1)

# create Legumes column from crop_42 crop_52
county_prod["pixels_Legumes"] = county_prod["crop_42"] + county_prod["crop_52"]
county_prod["kcal_Legumes"] = county_prod["kcal_state_crop_42"] + county_prod["kcal_state_crop_52"]
county_prod["kg_Legumes"] = county_prod["kg_state_crop_42"] + county_prod["kg_state_crop_52"]
county_prod = county_prod.drop(["crop_42", "crop_52"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_42", "kcal_state_crop_52"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_42", "kg_state_crop_52"], axis=1)

# create Melons column from crop_209 crop_48
county_prod["pixels_Melons"] = county_prod["crop_209"] + county_prod["crop_48"]
county_prod["kcal_Melons"] = county_prod["kcal_state_crop_209"] + county_prod["kcal_state_crop_48"]
county_prod["kg_Melons"] = county_prod["kg_state_crop_209"] + county_prod["kg_state_crop_48"]
county_prod = county_prod.drop(["crop_209", "crop_48"], axis=1)
county_prod = county_prod.drop(["kcal_state_crop_209", "kcal_state_crop_48"], axis=1)
county_prod = county_prod.drop(["kg_state_crop_209", "kg_state_crop_48"], axis=1)

In [162]:
# drop crops not listed in consumption data
drop_list = [ "crop_207", "crop_215", "crop_31", "crop_248", "crop_32", "crop_208", "crop_29", "crop_35", "crop_218", "crop_211", "crop_77", "crop_229", "crop_33", "crop_4", "crop_5", "crop_222", "crop_41", "crop_6", "crop_46", ]
kcal_drop_list = ["kcal_state_" + item for item in drop_list]
kg_drop_list = ["kg_state_" + item for item in drop_list]

# for every value in drop_list drop the column from county_prod if it exists
for item in drop_list:
    if item in county_prod.columns:
        county_prod = county_prod.drop(item, axis=1)
for item in kcal_drop_list:
    if item in county_prod.columns:
        county_prod = county_prod.drop(item, axis=1)
for item in kg_drop_list:
    if item in county_prod.columns:
        county_prod = county_prod.drop(item, axis=1)


In [163]:
code_name_map = { 'crop_68': 'Apples', 'crop_206': 'Carrots', 'crop_245': 'Celery', 'crop_1': 'Corn flour', 'crop_50': 'Cucumbers', 'crop_69': 'Grapes', 'crop_227': 'Lettuce', 'crop_28': 'Oat flour', 'crop_49': 'Onions', 'crop_212': 'Oranges', 'crop_10': 'Peanuts', 'crop_53': 'Green peas', 'crop_216': 'Peppers', 'crop_43': 'Potatoes', 'crop_3': 'Rice dried', 'crop_12': 'Sweet corn', 'crop_54': 'Tomatoes', 'crop_22': 'Wheat flour', 'crop_72': 'Other citrus fruits', 'crop_243': 'Cabbage', 'crop_246': 'Radishes'}
kcal_code_name_map = { "kcal_state_" + key: value for (key, value) in code_name_map.items() }
kcal_code_name_map = { key: "kcal_" + value for (key, value) in kcal_code_name_map.items() }
kg_code_name_map = { "kg_state_" + key: value for (key, value) in code_name_map.items() }
kg_code_name_map = { key: "kg_" + value for (key, value) in kg_code_name_map.items() }
code_name_map = { key: "pixels_" + value for (key, value) in code_name_map.items() }

In [164]:
# for every column in county_prod that matches a key in code_name_map, rename the column to the value in code_name_map
county_prod = county_prod.rename(columns=code_name_map)
county_prod = county_prod.rename(columns=kcal_code_name_map)
county_prod = county_prod.rename(columns=kg_code_name_map)
for c in county_prod.columns:
    print (c)

statefp
countyfp
countyns
geoid
name
namelsad
lsad
classfp
mtfcc
csafp
cbsafp
metdivfp
funcstat
aland
awater
intptlat
intptlon
pixels_Corn flour
pixels_Rice dried
pixels_Peanuts
pixels_Sweet corn
pixels_Wheat flour
pixels_Oat flour
pixels_Potatoes
pixels_Onions
pixels_Cucumbers
pixels_Green peas
pixels_Tomatoes
pixels_Apples
pixels_Grapes
pixels_Other citrus fruits
pixels_Carrots
pixels_Oranges
pixels_Peppers
pixels_Lettuce
pixels_Cabbage
pixels_Celery
pixels_Radishes
state
postal
kcal_Corn flour
kg_Corn flour
kcal_Oat flour
kg_Oat flour
kcal_Green peas
kg_Green peas
kcal_Cabbage
kg_Cabbage
kcal_Wheat flour
kg_Wheat flour
kcal_Potatoes
kg_Potatoes
kcal_Grapes
kg_Grapes
kcal_Sweet corn
kg_Sweet corn
kcal_Apples
kg_Apples
kcal_Carrots
kg_Carrots
kcal_Cucumbers
kg_Cucumbers
kcal_Onions
kg_Onions
kcal_Peppers
kg_Peppers
kcal_Radishes
kg_Radishes
kcal_Lettuce
kg_Lettuce
kcal_Peanuts
kg_Peanuts
kcal_Rice dried
kg_Rice dried
kcal_Other citrus fruits
kg_Other citrus fruits
kcal_Oranges
kg_Oran

In [165]:
county_prod.to_csv("../synced-data/origins_data/county_prod.csv", index=False)
# income_consumption.to_csv("../synced-data/destination_data/income_consumption.csv", index=True)

## Crosswalk 4 2007 consumption -> 2017 consumption
This is necessary as our prodction data is from 2017 but our consumption data is currently from 2007. We're going to scale this by the Food abailability per capita data

In [166]:
# run this to load data sets if starting here
county_prod = pd.read_csv("../synced-data/origins_data/county_prod.csv")
income_consumption = pd.read_csv("../synced-data/destination_data/income_consumption.csv")
income_consumption = income_consumption.rename(columns={"Unnamed: 0": "Income"})
income_consumption = income_consumption.drop("Brassica; total", axis=1)
income_consumption = income_consumption.set_index("Income")
food_availability = pd.read_csv("../synced-data/destination_data/food_availability_2007-2017.csv")
food_availability = food_availability.set_index("Year")


In [167]:
food_availability

Unnamed: 0_level_0,Apples,Bananas,Berries,Broccoli and cauliflower,Cabbage,Carrots,Celery,Corn flour,Cucumbers,Grapes,Green peas,Legumes,Lettuce,Melons,Oat flour,Onions,Oranges,Other citrus fruits,Other grain,Peanuts,Peppers,Potatoes,Radishes,Rice dried,Stone fruits,Sweet corn,Tomatoes,Tree nuts,Tropical fruits,Wheat flour
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2007,19.033007,13.662971,4.672395,1.822233,0.181247,2.560381,0.588859,101.294954,1.272957,10.71376,1.643965,12.0147,2.76392,2.87462,13.468522,5.153067,18.808011,2.790417,1.788937,40.178638,2.010462,56.778738,0.038442,54.011766,5.046364,7.484989,7.136286,21.550719,4.057073,833.151468
2017,17.344068,15.628583,6.458108,2.175035,0.419964,2.510968,0.431226,110.086359,1.293924,8.704417,1.086662,16.725546,2.822044,2.600865,13.612209,5.746792,12.044068,3.170611,2.349979,46.526003,2.469386,52.40838,0.038932,54.0,3.82889,5.713299,6.72402,29.330735,4.378377,828.01205
ratio 2007/2017,1.097379,0.87423,0.723493,0.837795,0.431578,1.019679,1.365546,0.920141,0.983796,1.230842,1.512858,0.718344,0.979404,1.105255,0.989444,0.896686,1.5616,0.880088,0.761257,0.863574,0.814155,1.08339,0.987406,1.000218,1.31797,1.310099,1.061313,0.734749,0.926616,1.006207


In [170]:
income_consumption.columns

Index(['Apples', 'Bananas', 'Berries', 'Grapes', 'Melons', 'Oranges',
       'Other citrus fruits', 'Stone fruits', 'Tropical fruits',
       'Broccoli and cauliflower', 'Carrots', 'Celery', 'Cucumbers',
       'Green peas', 'Legumes', 'Lettuce', 'Onions', 'Peppers', 'Tomatoes',
       'Sweet corn', 'Potatoes', 'Corn flour', 'Oat flour', 'Rice dried',
       'Wheat flour', 'Peanuts', 'Tree nuts', 'Other grain', 'Radishes',
       'Cabbage'],
      dtype='object')

In [171]:
# for every column in income_consumption multiply that columns values by the value in the 3rd row of the food_availability column with the same name
income_consumption_2017 = income_consumption.copy()
for c in income_consumption_2017.columns:
    income_consumption_2017[c] = income_consumption_2017[c] / food_availability.loc["ratio 2007/2017"][c]
income_consumption_2017 = income_consumption_2017.rename(index={"low_2007-08": "low_2017", "high_2007-08": "high_2017", "avg_2007-08": "avg_2017"})

In [172]:
income_consumption_2017

Unnamed: 0_level_0,Apples,Bananas,Berries,Grapes,Melons,Oranges,Other citrus fruits,Stone fruits,Tropical fruits,Broccoli and cauliflower,Carrots,Celery,Cucumbers,Green peas,Legumes,Lettuce,Onions,Peppers,Tomatoes,Sweet corn,Potatoes,Corn flour,Oat flour,Rice dried,Wheat flour,Peanuts,Tree nuts,Other grain,Radishes,Cabbage
Income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
low_2017,12.854918,4.913487,3.423133,3.224566,1.694934,9.951375,2.793435,2.020216,3.118208,1.916597,1.530243,0.833744,1.037392,0.509702,3.44767,5.071283,3.839431,3.69936,12.026692,2.091212,20.645024,7.502841,1.017717,5.509942,41.062821,2.342614,0.888974,5.535413,0.909568,2.080997
high_2017,11.904233,5.655439,4.626872,3.519383,2.30232,9.646385,3.267597,2.677561,2.584637,2.847825,2.00622,0.973255,1.051224,0.530689,3.523443,7.303573,4.572919,4.41249,13.736243,2.202005,22.516515,6.097907,1.439474,5.410174,45.33184,3.492911,2.259476,6.488767,1.120881,2.56446
avg_2017,12.379576,5.284463,4.025003,3.371974,1.998627,9.79888,3.030516,2.348889,2.851423,2.382211,1.768232,0.903499,1.044308,0.520196,3.485557,6.187428,4.206175,4.055925,12.881467,2.146609,21.580769,6.800374,1.228595,5.460058,43.19733,2.917763,1.574225,6.01209,1.015225,2.322729


In [173]:
# write to csv file as income consumption 2017
income_consumption_2017.to_csv("../synced-data/destination_data/income_consumption_2017.csv", index=True)

# Scaling production numbers to 2017 consumption

at this point:
- `county_prod` is all the unscaled data of how much was produced in the US in 2017. These values are in kcal and pixels per year per crop per county.
- `income_consumption_2017` is how much was consumed in 2017 in kg per year per crop per income class

Next Steps:
1. Use `income_consumption_2017_kg` to create kg consumed per county column. These columns will be named `low_kg_consumed_crop`, `high_kg_consumed_crop`, `other_kg_consumed_crop`. Low, high and other refer to income.
2. Use these kg values to convert to kcal's consumed using the `total kcal` to `total kg` ratio
3. Scale production data to match total consumption nationally

In [302]:
# run if starting here
county_prod = pd.read_csv("../synced-data/origins_data/county_prod.csv")
income_consumption_2017_kg = pd.read_csv("../synced-data/destination_data/income_consumption_2017.csv")
county_population = gpd.read_file("../synced-data/destination_data/population_counties_conus.geojson")
county_prod["geoid"] = county_prod["geoid"].astype(int)
county_prod = county_prod.set_index("geoid")
county_population["geoid"] = county_population["geoid"].astype(int)
county_population = county_population.set_index("geoid")
income_consumption_2017_kg = income_consumption_2017_kg.set_index("Income")


In [306]:
# this adds consumption columns to county_population
# these columns are named in the following schema low_kg_consumed_crop type
# for example low_kg_consumed_apples
for c in income_consumption_2017_kg.columns:
    low_consumption_kg = income_consumption_2017_kg[c]["low_2017"] * county_population["income_low_income"]
    high_consumption_kg = income_consumption_2017_kg[c]["high_2017"] * county_population["income_high_income"]
    other_consumption_kg = income_consumption_2017_kg[c]["avg_2017"] * county_population["income_unknown_income"]
    county_population["low_kg_consumed_" + c] = low_consumption_kg
    county_population["high_kg_consumed_" + c] = high_consumption_kg
    county_population["other_kg_consumed_" + c] = other_consumption_kg
    county_population["total_kg_consumed_" + c] = low_consumption_kg + high_consumption_kg + other_consumption_kg


  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)


In [308]:
# This calculates a kcal/kg ratio and adds a column to county_population with the number of kcal's consumed by income demographic
for c in county_prod.columns:
    if c.startswith("kg_"):
        crop_name = c[3:]
        kcal_kg_ratio = county_prod["kcal_" + crop_name].sum() / county_prod[c].sum()
        county_population["low_kcal_consumed_"+crop_name] = county_population["low_kg_consumed_" + crop_name] * kcal_kg_ratio
        county_population["high_kcal_consumed_"+crop_name] = county_population["high_kg_consumed_" + crop_name] * kcal_kg_ratio
        county_population["other_kcal_consumed_"+crop_name] = county_population["other_kg_consumed_" + crop_name] * kcal_kg_ratio
        county_population["total_kcal_consumed_"+crop_name] = county_population["total_kg_consumed_" + crop_name] * kcal_kg_ratio

  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__se

In [309]:
# this creates a new dataframe with the values from county_prod scaled to match the total kcal consumed
county_prod_scaled = county_prod.copy()
for c in county_population.columns:
    if c.startswith("total_kcal_consumed_"):
        crop_name = c[20:]
        crop_total_consumed = county_population[c].sum()
        scale_ratio = crop_total_consumed / county_prod["kcal_" + crop_name].sum()
        print (c)
        print (scale_ratio)
        county_prod_scaled["kcal_" + crop_name] = county_prod_scaled["kcal_" + crop_name] * scale_ratio
        county_prod_scaled["kg_" + crop_name] = county_prod_scaled["kg_" + crop_name] * scale_ratio


total_kcal_consumed_Corn flour
0.0057193427126893985
total_kcal_consumed_Oat flour
0.5783999823315072
total_kcal_consumed_Green peas
0.18461925096830342
total_kcal_consumed_Cabbage
0.6793924083336856
total_kcal_consumed_Wheat flour
0.7343743712634527
total_kcal_consumed_Potatoes
0.34334588158955603
total_kcal_consumed_Grapes
0.16815773860711436
total_kcal_consumed_Sweet corn
0.621147320806031
total_kcal_consumed_Apples
0.7524179574225223
total_kcal_consumed_Carrots
0.397725241532623
total_kcal_consumed_Cucumbers
0.38956003091000607
total_kcal_consumed_Onions
0.376737286173717
total_kcal_consumed_Peppers
8.938353387985348
total_kcal_consumed_Radishes
39.37870807165856
total_kcal_consumed_Lettuce
0.47970976591565195
total_kcal_consumed_Peanuts
0.32278322594534725
total_kcal_consumed_Rice dried
0.22029343196654683
total_kcal_consumed_Other citrus fruits
0.43004382384427736
total_kcal_consumed_Oranges
0.6903329049439216
total_kcal_consumed_Tomatoes
0.38981776728458917
total_kcal_consumed_C

  scale_ratio = crop_total_consumed / county_prod["kcal_" + crop_name].sum()


In [312]:
# this joins kg and kcal values from county_prod_scaled to county_population on geoid
county_population = county_population.join(county_prod_scaled.filter(regex="^kg_|^kcal_"))

for c in county_population.columns:
    if c.startswith("kg_"):
        crop_name = c[3:]
        county_population = county_population.rename(columns={c: "kg_produced_" + crop_name})
    if c.startswith("kcal_"):
        crop_name = c[5:]
        county_population = county_population.rename(columns={c: "kcal_produced_" + crop_name})

kg_Corn flour
kg_Oat flour
kg_Green peas
kg_Cabbage
kg_Wheat flour
kg_Potatoes
kg_Grapes
kg_Sweet corn
kg_Apples
kg_Carrots
kg_Cucumbers
kg_Onions
kg_Peppers
kg_Radishes
kg_Lettuce
kg_Peanuts
kg_Rice dried
kg_Other citrus fruits
kg_Oranges
kg_Tomatoes
kg_Celery
kg_Tree nuts
kg_Other grain
kg_Broccoli and cauliflower
kg_Stone fruits
kg_Berries
kg_Legumes
kg_Melons


In [335]:
# sum high_kcal_consumed_Corn flour column
double_check = county_population["total_kcal_consumed_Apples"].sum() - county_population["kcal_produced_Apples"].sum()
if double_check == 0:
    print ("Numbers look good!")
else: 
    print ("Numbers don't match, difference is " + str(double_check))


Numbers look good!


In [317]:
# county_population.to_file("../synced-data/county_population_consumption_scaled-production.geojson", driver="GeoJSON")