# Set Up

In [3]:
# import necessary libraries
import pandas as pd
import sqlite3
import altair as alt
import geopandas as gpd
from vega_datasets import data

# set global vars
db_name = "../data/field_crops.db"
crop_table = "midwest_key_field_crops_cleaned"
area_table = "midwest_area_planted_cleaned"
output_path = "../static_final/"

# db connection
conn = sqlite3.connect(db_name)

# color theme
color_scale = alt.Scale(
    domain=["CORN", "SOYBEANS", "WHEAT"], range=["#FFB14E", "#FA8775", "#B5E384"]
)


# Regional Analysis

## Total Production

In [4]:

# pull total crop production by year
query = f"""
Select 
    commodity_desc,
    year, 
    sum(value) as total_prod
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year >= 1975
group by 
    commodity_desc, year
"""
agg_prod_region_all_years = pd.read_sql(query, conn)

# make stacked area by commodity type
chart = (
    alt.Chart(agg_prod_region_all_years)
    .mark_area()
    .encode(
        x=alt.X("year:O", axis=alt.Axis(title="Year")),
        y=alt.Y(
            "total_prod:Q", axis=alt.Axis(title="Crop Production in Bushels (bsh)")
        ),
        color=alt.Color(
            "commodity_desc:N", scale=color_scale, legend=alt.Legend(title="Crop")
        ),
    )
    .properties(
        title=alt.TitleParams(
            text="Annual Crop Production in the Midwest",
            subtitle="Corn, Soybeans, and Wheat in bushels (bsh)",
            anchor="middle",
        ),
        width=600,
        height=400,
    )
)

# save chart
file_name = "01_AGG_PRODUCTION_BY_CROP"
chart.save(f"{output_path}{file_name}.png")
chart.show()


## Total Area Planted


In [5]:
# pull total area planted by year
query = f"""
Select 
    commodity_desc,
    year, 
    sum(value) as total_area_planted,
    count(*) as num_counties
from {area_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year >= 1975
group by 
    commodity_desc, year
"""
agg_area_planted_region_all_years = pd.read_sql(query, conn)

# make stacked area
chart = (
    alt.Chart(agg_area_planted_region_all_years)
    .mark_area()
    .encode(
        x=alt.X("year:O", axis=alt.Axis(title="Year")),
        y=alt.Y(
            "total_area_planted:Q", axis=alt.Axis(title="Total Area Planted (in Acres)")
        ),
        color=alt.Color(
            "commodity_desc:N", scale=color_scale, legend=alt.Legend(title="Crop")
        ),
    )
    .properties(
        title=alt.TitleParams(
            text="Area Planted in Acres",
            subtitle="Corn, Soybeans, and Wheat",
            anchor="middle",
        ),
        width=600,
        height=400,
    )
)

# save chart
file_name = "02_AGG_AREA_PLANTED_BY_CROP"
chart.save(f"{output_path}{file_name}.png")
chart.show()


## Aggregate Yield

In [6]:
# merge production and area planted data
agg_yield_region_all_years = pd.merge(
    agg_area_planted_region_all_years,
    agg_prod_region_all_years,
    on=["commodity_desc", "year"],
)
agg_yield_region_all_years["yield"] = (
    agg_yield_region_all_years["total_prod"]
    / agg_yield_region_all_years["total_area_planted"]
)

# line chart by commodity over time
chart = (
    alt.Chart(agg_yield_region_all_years)
    .mark_line()
    .encode(
        x=alt.X("year:O", axis=alt.Axis(title="Year")),
        y=alt.Y("yield:Q", axis=alt.Axis(title="Yield (bsh per acre)")),
        color=alt.Color(
            "commodity_desc:N", scale=color_scale, legend=alt.Legend(title="Crop")
        ),
    )
    .properties(title="Annnual Yield by Crop", width=600, height=400)
)

# save
file_name = "03_AGG_YIELD_BY_CROP"
chart.save(f"{output_path}{file_name}.png")
chart.show()


# State Analysis


## Production Change Bar

In [7]:
# pull in avg production and area planted from beginning and end of period by state across all crop types
query = f"""
Select 
    avg(value) AS pres_prod,
    state_alpha
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 2018 and 2023
group by state_alpha
"""
conn = sqlite3.connect(db_name)
avg_prod_2015_2020 = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS past_prod,
    state_alpha
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 1975 and 1980
group by state_alpha
"""
avg_prod_1975_1980 = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS pres_area,
    state_alpha
from {area_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 2018 and 2023
group by state_alpha
"""
avg_area_2015_2020 = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS past_area,
    state_alpha
from {area_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 1975 and 1980
group by state_alpha
"""
avg_area_1975_1980 = pd.read_sql(query, conn)

# calculate past yield by state
avg_yield_past = pd.merge(avg_area_1975_1980, avg_prod_1975_1980, on=["state_alpha"])
avg_yield_past["yield_past"] = avg_yield_past["past_prod"] / avg_yield_past["past_area"]

# calculate present yield by state
avg_yield_present = pd.merge(avg_area_2015_2020, avg_prod_2015_2020, on=["state_alpha"])
avg_yield_present["yield_present"] = (
    avg_yield_present["pres_prod"] / avg_yield_present["pres_area"]
)

# merge and calculate change in yield
yield_change = pd.merge(avg_yield_past, avg_yield_present, on=["state_alpha"])
yield_change["abs_change_yield"] = (
    yield_change["yield_present"] - yield_change["yield_past"]
)
yield_change["perc_change_yield"] = (
    (yield_change["yield_present"] - yield_change["yield_past"])
    / yield_change["yield_past"]
) * 100

# pull in production by crop type
query = f"""
Select 
    avg(value) AS Value_20,
    commodity_desc,
    state_alpha
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 2018 and 2023
group by state_alpha, commodity_desc
"""
conn = sqlite3.connect(db_name)
avg_prod_2015_2020 = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS Value_70,
    commodity_desc,
    state_alpha
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 1975 and 1980
group by state_alpha, commodity_desc
"""
avg_prod_1975_1980 = pd.read_sql(query, conn)

# calculate change in production across time period
prod_change = pd.merge(
    avg_prod_2015_2020, avg_prod_1975_1980, on=["commodity_desc", "state_alpha"]
)
prod_change["abs_change_in_prod"] = prod_change["Value_20"] - prod_change["Value_70"]

# sort states by increase in production amount
state_totals = (
    prod_change.groupby("state_alpha")["abs_change_in_prod"]
    .sum()
    .sort_values(ascending=False)
)
prod_change["state_alpha"] = pd.Categorical(
    prod_change["state_alpha"], categories=state_totals.index, ordered=True
)

# make bar chart for production change by state and commodity type
bar_chart = (
    alt.Chart(prod_change)
    .mark_bar()
    .encode(
        x=alt.X("state_alpha:O", title="State", sort=state_totals.index.tolist()),
        y=alt.Y("abs_change_in_prod:Q", title="Change in Production (bsh)"),
        color=alt.Color(
            "commodity_desc:N", scale=color_scale, legend=alt.Legend(title="Crop")
        ),
        xOffset="commodity_desc:N",
    )
)

# make line chart for yield change
line_chart = (
    alt.Chart(yield_change)
    .mark_line(color="red")
    .encode(
        x=alt.X("state_alpha:O", title="State", sort=state_totals.index.tolist()),
        y=alt.Y("abs_change_yield:Q", title="Change in Yield (bsh / acre)"),
    )
)

# combine
combined_chart = (
    alt.layer(bar_chart, line_chart)
    .resolve_scale(y="independent")
    .properties(
        title=alt.TitleParams(
            text="Change in Production and Yield by State",
            subtitle="Between 1975 and 2023",
            anchor="middle",
        ),
        width=600,
        height=400,
    )
)

# save
file_name = "04_STATE_PRODUCTION_AND_YIELD_CHANGE_BY_CROP"
combined_chart.save(f"{output_path}{file_name}.png")
combined_chart.show()


# County Level


In [8]:
# create geopandas dfs
url = data.us_10m.url
states_gdf = gpd.read_file(url, layer="states")
counties_gdf = gpd.read_file(url, layer="counties")

In [9]:
def load_midwest_counties(db_name, table, counties_gdf):

    query = f"""
    Select 
        distinct
        state_ansi
    from {table} 
    """
    conn = sqlite3.connect(db_name)
    check = pd.read_sql(query, conn)

    state_ansi_list = check.iloc[:, 0].to_list()
    midwest_counties_gdf = counties_gdf[
        counties_gdf["id"].str[:2].isin(state_ansi_list)
    ]
    midwest_counties_gdf = midwest_counties_gdf[
        counties_gdf["id"].str[:2].isin(state_ansi_list)
        & (counties_gdf["id"].str.len() == 5)
    ]

    return midwest_counties_gdf


def make_background_maps(midwest_counties_gdf, states_gdf):
    county_map_background = (
        alt.Chart(midwest_counties_gdf)
        .mark_geoshape(fill="lightgray", stroke="black", strokeWidth=0.5)
        .properties(width=800, height=500)
        .project("albersUsa")
    )

    midwestern_state_ids = [17, 18, 19, 20, 26, 27, 29, 31, 38, 39, 46, 55]

    state_map_background = (
        alt.Chart(states_gdf)
        .mark_geoshape(fill=None, stroke="black", strokeWidth=1.5)
        .transform_filter(
            alt.FieldOneOfPredicate(field="id", oneOf=midwestern_state_ids)
        )
        .properties(width=800, height=500)
        .project("albersUsa")
    )

    return county_map_background, state_map_background


def create_climate_maps(gpd, metric_col, metric_full_name):

    columns = ["id", "geometry"]
    metric_df = gpd[columns + [metric_col]]

    county_map_filled = (
        alt.Chart(metric_df)
        .mark_geoshape(stroke="black", strokeWidth=0.5)
        .encode(
            color=alt.Color(
                f"{metric_col}:Q",
                scale=alt.Scale(
                    scheme="redyellowgreen",
                    domainMid=0,
                    domain=[
                        metric_df[f"{metric_col}"].min(),
                        metric_df[f"{metric_col}"].max(),
                    ],
                ),
                title=f"{metric_full_name}",
            ),
            tooltip=["id:N", f"{metric_col}:Q"],
        )
        .properties(
            title=f"Change in Average Annual {metric_full_name}, between 1980 and 2023",
            width=800,
            height=500,
        )
        .project("albersUsa")
    )

    return county_map_filled


## Yield Maps

In [10]:
# pull in production and area data by county for begin and end of period
query = f"""
Select 
    avg(value) AS avg_prod_present,
    commodity_desc,
    state_alpha, 
    state_ansi|| county_ansi as id
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 2018 and 2023
group by state_ansi|| county_ansi, commodity_desc
"""
avg_prod_present = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS avg_prod_past,
    commodity_desc,
    state_alpha,
    state_ansi|| county_ansi as id
from {crop_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 1975 and 1980
group by state_ansi|| county_ansi, commodity_desc
"""
avg_prod_past = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS avg_area_present,
    commodity_desc,
    state_alpha,
    state_ansi|| county_ansi as id
from {area_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 2018 and 2023
group by state_ansi|| county_ansi , commodity_desc
"""
avg_area_present = pd.read_sql(query, conn)
query = f"""
Select 
    avg(value) AS avg_area_past,
    commodity_desc,
    state_alpha,
    state_ansi|| county_ansi as id
from {area_table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
and asd_code != 99
and county_ansi != ""
and year between 1975 and 1980
group by state_ansi|| county_ansi , commodity_desc
"""
avg_area_past = pd.read_sql(query, conn)

# calc yield
avg_yield_past = pd.merge(
    avg_prod_past, avg_area_past, on=["commodity_desc", "id", "state_alpha"]
)
avg_yield_past["yield_past"] = (
    avg_yield_past["avg_prod_past"] / avg_yield_past["avg_area_past"]
)
avg_yield_present = pd.merge(
    avg_prod_present, avg_area_present, on=["commodity_desc", "id", "state_alpha"]
)
avg_yield_present["yield_present"] = (
    avg_yield_present["avg_prod_present"] / avg_yield_present["avg_area_present"]
)

# calc change in yield
yield_change = pd.merge(
    avg_yield_past, avg_yield_present, on=["commodity_desc", "id", "state_alpha"]
)
yield_change["abs_change_yield"] = (
    yield_change["yield_present"] - yield_change["yield_past"]
)
yield_change["perc_change_yield"] = (
    (yield_change["yield_present"] - yield_change["yield_past"])
    / yield_change["yield_past"]
) * 100


In [11]:
# make geopandas df with yield info
midwest_counties_gdf = load_midwest_counties(db_name, crop_table, counties_gdf)
merged = gpd.GeoDataFrame(
    pd.merge(yield_change, midwest_counties_gdf, on="id", how="left")
)
merged.set_geometry("geometry", inplace=True)

# make background maps for layering
county_map_background, state_map_background = make_background_maps(
    midwest_counties_gdf, states_gdf
)

# make corn yield change map
corn_df = merged[merged["commodity_desc"] == "CORN"]
county_map_filled = create_climate_maps(corn_df, "abs_change_yield", "Change in Yield")

# combine filled map with layers
layered_map = county_map_background + county_map_filled + state_map_background

layered_map.show()
file_name = "05_YIELD_CHANGE_MAP"
layered_map.save(f"{output_path}{file_name}.png")


  result = super().__getitem__(key)


## Climate Data

In [12]:
# mapping and filtering codes
noaa_midwest_codes = [
    "11",
    "12",
    "13",
    "14",
    "20",
    "21",
    "23",
    "25",
    "32",
    "33",
    "39",
    "47",
]
fips_mapping = {
    "11": "17",  # Illinois
    "12": "18",  # Indiana
    "13": "19",  # Iowa
    "14": "20",  # Kansas
    "20": "26",  # Michigan
    "21": "27",  # Minnesota
    "23": "29",  # Missouri
    "25": "31",  # Nebraska
    "32": "38",  # North Dakota
    "33": "39",  # Ohio
    "39": "46",  # South Dakota
    "47": "55",  # Wisconsin
}
final_df_cols = ["Year", "County_Code", "state_fips"]


# function to parse raw input data
def parse_climdiv_data(
    file_path,
    yearly_avg_column_name,
    midwest_codes=noaa_midwest_codes,
    final_df_cols=final_df_cols,
):
    column_specs = [
        (0, 2),  # STATE-CODE (1-2)
        (2, 5),  # DIVISION-NUMBER (3-5)
        (5, 7),  # ELEMENT CODE (6-7)
        (7, 11),  # YEAR (8-11)
        (11, 18),  # JAN-VALUE (12-18)
        (18, 25),  # FEB-VALUE (19-25)
        (25, 32),  # MAR-VALUE (26-32)
        (32, 39),  # APR-VALUE (33-39)
        (39, 46),  # MAY-VALUE (40-46)
        (46, 53),  # JUNE-VALUE (47-53)
        (53, 60),  # JULY-VALUE (54-60)
        (60, 67),  # AUG-VALUE (61-67)
        (67, 74),  # SEPT-VALUE (68-74)
        (74, 81),  # OCT-VALUE (75-81)
        (81, 88),  # NOV-VALUE (82-88)
        (88, 95),  # DEC-VALUE (89-95)
    ]

    column_names = [
        "State_Code",
        "Division_Number",
        "Element_Code",
        "Year",
        "Jan_Value",
        "Feb_Value",
        "Mar_Value",
        "Apr_Value",
        "May_Value",
        "Jun_Value",
        "Jul_Value",
        "Aug_Value",
        "Sep_Value",
        "Oct_Value",
        "Nov_Value",
        "Dec_Value",
    ]

    df = pd.read_fwf(
        file_path,
        colspecs=column_specs,
        names=column_names,
        dtype={"State_Code": str, "Division_Number": str},
    )

    df["state_fips"] = df["State_Code"].map(fips_mapping)
    df["County_Code"] = df["state_fips"] + df["Division_Number"]
    numeric_columns = column_names[4:]
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors="coerce")

    # handle missing values
    df.replace(
        {
            "Jan_Value": {-99.99: None, -9.99: None},
            "Feb_Value": {-99.99: None, -9.99: None},
            "Mar_Value": {-99.99: None, -9.99: None},
            "Apr_Value": {-99.99: None, -9.99: None},
            "May_Value": {-99.99: None, -9.99: None},
            "Jun_Value": {-99.99: None, -9.99: None},
            "Jul_Value": {-99.99: None, -9.99: None},
            "Aug_Value": {-99.99: None, -9.99: None},
            "Sep_Value": {-99.99: None, -9.99: None},
            "Oct_Value": {-99.99: None, -9.99: None},
            "Nov_Value": {-99.99: None, -9.99: None},
            "Dec_Value": {-99.99: None, -9.99: None},
        },
        inplace=True,
    )

    df[yearly_avg_column_name] = df[numeric_columns].mean(axis=1)

    midwest_df = df[df["State_Code"].isin(midwest_codes)]

    midwest_df_post1950 = midwest_df[midwest_df["Year"] > 1950]

    output_columns = final_df_cols + [yearly_avg_column_name]

    return midwest_df_post1950[output_columns]


precipitation_path = "../data/climate_data/climdiv-pcpncy-v1.0.0-20241021.txt"
avg_temp_path = "../data/climate_data/climdiv-tmpccy-v1.0.0-20241021.txt"
max_temp_path = "../data/climate_data/climdiv-tmaxcy-v1.0.0-20241021.txt"
min_temp_path = "../data/climate_data/climdiv-tmincy-v1.0.0-20241021.txt"
precip_df = parse_climdiv_data(precipitation_path, "ann_avg_precip")
avg_temp_df = parse_climdiv_data(avg_temp_path, "ann_avg_temp")
max_temp_df = parse_climdiv_data(max_temp_path, "ann_max_temp")
min_temp_df = parse_climdiv_data(min_temp_path, "ann_min_temp")

merge_cols = ["Year", "County_Code", "state_fips"]
annual_climate_data_df = (
    precip_df.merge(avg_temp_df, on=merge_cols)
    .merge(max_temp_df, on=merge_cols)
    .merge(min_temp_df, on=merge_cols)
)
annual_climate_data_df = annual_climate_data_df.sort_values(by=["County_Code", "Year"])

rolling_avg_30yr_climate_data_df = (
    annual_climate_data_df.groupby("County_Code")[
        ["Year", "ann_avg_precip", "ann_avg_temp", "ann_max_temp", "ann_min_temp"]
    ]
    .apply(lambda x: x.set_index("Year").rolling(window=30).mean())
    .reset_index()
)

climate_data_1980 = rolling_avg_30yr_climate_data_df[
    rolling_avg_30yr_climate_data_df["Year"].isin([1980])
]
climate_data_1980.rename(columns={"County_Code": "id"}, inplace=True)

climate_data_2023 = rolling_avg_30yr_climate_data_df[
    rolling_avg_30yr_climate_data_df["Year"].isin([2023])
]
climate_data_2023.rename(columns={"County_Code": "id"}, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  climate_data_1980.rename(columns={"County_Code": "id"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  climate_data_2023.rename(columns={"County_Code": "id"}, inplace=True)


In [13]:
def calc_difference(metric, type):

    filtered_df = rolling_avg_30yr_climate_data_df[
        rolling_avg_30yr_climate_data_df["Year"].isin([1980, 2023])
    ]

    pivot_df = filtered_df.pivot(index="County_Code", columns="Year", values=metric)

    if type == "abs_change":
        pivot_df[f"{metric}_{type}"] = pivot_df[2023] - pivot_df[1980]
    if type == "pct_change":
        pivot_df[f"{metric}_{type}"] = (
            (pivot_df[2023] - pivot_df[1980]) / pivot_df[1980]
        ) * 100

    pivot_df.reset_index(inplace=True)

    return pivot_df, f"{metric}_{type}"


def gen_change_df(climate_metrics, type):
    change_df_list = []
    for metric in climate_metrics:
        change_df, col_name = calc_difference(metric, type)
        # Append the relevant columns to the list
        change_df_list.append(change_df[["County_Code", col_name]])

    # Concatenate all DataFrames in the list into a single DataFrame
    merge_cols = ["County_Code"]
    change_climate_data_df = (
        change_df_list[0]
        .merge(change_df_list[1], on=merge_cols)
        .merge(change_df_list[2], on=merge_cols)
        .merge(change_df_list[3], on=merge_cols)
    )
    change_climate_data_df.reset_index(drop=True, inplace=True)
    return change_climate_data_df


### Precipitation Change Map

In [14]:
# make change in climate geopandas df
midwest_counties_gdf = load_midwest_counties(db_name, area_table, counties_gdf)
climate_metrics = ["ann_avg_precip", "ann_avg_temp", "ann_max_temp", "ann_min_temp"]
abs_change_climate_data_df = gen_change_df(climate_metrics, "abs_change")
abs_change_climate_data_df.rename(columns={"County_Code": "id"}, inplace=True)
change_climate_data_gdf = gpd.GeoDataFrame(
    pd.merge(abs_change_climate_data_df, midwest_counties_gdf, on="id", how="left")
)
change_climate_data_gdf.set_geometry("geometry", inplace=True)

# gen background maps and chloropleth by precipitation change
county_map_background, state_map_background = make_background_maps(
    midwest_counties_gdf, states_gdf
)
county_map_filled = create_climate_maps(
    change_climate_data_gdf, "ann_avg_precip_abs_change", "Change in inches"
)
county_map_filled = county_map_filled.properties(
    title="Change in Annual Precipitation between 1980 and 2023"
)

# Layer the filled map on top of the gray background
layered_map = county_map_background + county_map_filled + state_map_background

# save
layered_map.show()
file_name = "05_PRECIP_CHANGE_MAP"
layered_map.save(f"{output_path}{file_name}.png")


  result = super().__getitem__(key)


### Temperature Change Map

In [15]:
# generte change in ann temp map and layer with background maps
county_map_filled = create_climate_maps(
    change_climate_data_gdf, "ann_avg_temp_abs_change", "Change in deg. F"
)
county_map_filled = county_map_filled.properties(
    title="Change in Annual Average Temperature between 1980 and 2023"
)
layered_map = county_map_background + county_map_filled + state_map_background

# save
layered_map.show()
file_name = "06_TEMP_CHANGE_MAP"
layered_map.save(f"{output_path}{file_name}.png")


## County Climate Scatters


In [16]:
merged_1980 = pd.merge(yield_change, climate_data_1980, on="id", how="left")

charts = []

climate_features = ["ann_avg_precip", "ann_avg_temp"]
x_axes = ["Average Annual Precipitation", "Average Annual Temperature"]
# Create a scatter plot with facets for each commodity

corn_merged_1980 = merged_1980[merged_1980["commodity_desc"] == "CORN"]
for i, feature in enumerate(climate_features):
    scatter = (
        alt.Chart(corn_merged_1980)
        .mark_circle(size=60)
        .encode(
            x=alt.X(
                f"{feature}:Q",
                scale=alt.Scale(
                    domain=[
                        merged_1980[f"{feature}"].min(),
                        merged_1980[f"{feature}"].max(),
                    ]
                ),
                title=x_axes[i],
            ),
            y=alt.Y("yield_past:Q", title="" if i == 1 else "Yield (1980)"),
        )
    )

    line_of_best_fit = (
        scatter.transform_regression(
            f"{feature}",
            "yield_past",
            method="poly",
        )
        .transform_calculate(ann_avg_temp_squared=f"datum.{feature} * datum.{feature}")
        .mark_line(color="red")
    )

    chart = scatter + line_of_best_fit
    charts.append(chart)

# stitch the charts together horizontally
final_chart = alt.hconcat(*charts).properties(
    title=alt.TitleParams(
        "1980: Yield x Climate Features", anchor="middle", fontSize=16
    )
)

# save
final_chart.show()
file_name = "07_SCATTER_1980"
final_chart.save(f"{output_path}{file_name}.png")


In [17]:
merged_2023 = pd.merge(yield_change, climate_data_2023, on="id", how="left")

climate_features = ["ann_avg_precip", "ann_avg_temp"]
x_axes = ["Average Annual Precipitation", "Average Annual Temperature"]
corn_merged_2023 = merged_2023[merged_2023["commodity_desc"] == "CORN"]

charts = []

for i, feature in enumerate(climate_features):
    scatter = (
        alt.Chart(corn_merged_2023)
        .mark_circle(size=60)
        .encode(
            x=alt.X(
                f"{feature}:Q",
                scale=alt.Scale(
                    domain=[
                        merged_2023[f"{feature}"].min(),
                        merged_2023[f"{feature}"].max(),
                    ]
                ),
                title=x_axes[i],
            ),
            y=alt.Y("yield_present:Q", title="" if i == 1 else "Yield (2023)"),
        )
    )

    line_of_best_fit = (
        scatter.transform_regression(
            f"{feature}",
            "yield_present",
            method="poly",
        )
        .transform_calculate(ann_avg_temp_squared=f"datum.{feature} * datum.{feature}")
        .mark_line(color="red")
    )

    chart = scatter + line_of_best_fit
    charts.append(chart)

final_chart = alt.hconcat(*charts).properties(
    title=alt.TitleParams(
        "2023: Yield x Climate Features", anchor="middle", fontSize=16
    )
)

final_chart.show()
file_name = "08_SCATTER_2023"
final_chart.save(f"{output_path}{file_name}.png")


## Yield Heat Maps

In [18]:
change_climate_data_gdf = gpd.GeoDataFrame(
    pd.merge(climate_data_1980, midwest_counties_gdf, on="id", how="left")
)

merged = gpd.GeoDataFrame(
    pd.merge(yield_change, change_climate_data_gdf, on="id", how="inner")
)
merged.set_geometry("geometry", inplace=True)

corn_merged = merged[merged["commodity_desc"] == "CORN"]
corn_merged = corn_merged[
    (corn_merged["ann_avg_temp"] >= 36) & (corn_merged["ann_avg_temp"] <= 58)
]
heatmap_df = corn_merged[["ann_avg_temp", "ann_avg_precip", "yield_past"]]
heatmap = (
    alt.Chart(heatmap_df)
    .mark_rect()
    .encode(
        x=alt.X(
            "ann_avg_temp:Q", bin=alt.Bin(maxbins=12), title="Temperature (in deg. F)"
        ),
        y=alt.Y(
            "ann_avg_precip:Q",
            bin=alt.Bin(maxbins=12),
            title="Precipitation (in inches)",
        ),
        color=alt.Color(
            "mean(yield_past):Q",
            scale=alt.Scale(range=["yellow", "green"], domain=[20, 80]),
            title="Avg Yield",
        ),
    )
    .properties(
        width=600,
        height=400,
        title=alt.TitleParams(
            text="Heat Map of Average Corn Yield (1975-1980)",
            subtitle="Corn Yield (bsh/acre) for Counties Grouped by Climate Averages in 1980",
            anchor="middle",
        ),
    )
)

heatmap.display()
file_name = "09_HEATMAP_1980"
heatmap.save(f"{output_path}{file_name}.png")


change_climate_data_gdf = gpd.GeoDataFrame(
    pd.merge(climate_data_2023, midwest_counties_gdf, on="id", how="left")
)

merged = gpd.GeoDataFrame(
    pd.merge(yield_change, change_climate_data_gdf, on="id", how="inner")
)
merged.set_geometry("geometry", inplace=True)

corn_merged = merged[merged["commodity_desc"] == "CORN"]
corn_merged = corn_merged[
    (corn_merged["ann_avg_temp"] >= 36) & (corn_merged["ann_avg_temp"] <= 58)
]
heatmap_df = corn_merged[["ann_avg_temp", "ann_avg_precip", "yield_present"]]
heatmap = (
    alt.Chart(heatmap_df)
    .mark_rect()
    .encode(
        x=alt.X(
            "ann_avg_temp:Q", bin=alt.Bin(maxbins=12), title="Temperature (in deg. F)"
        ),
        y=alt.Y(
            "ann_avg_precip:Q",
            bin=alt.Bin(maxbins=12),
            title="Precipitation (in inches)",
        ),
        color=alt.Color(
            "mean(yield_present):Q",
            scale=alt.Scale(range=["yellow", "green"], domain=[50, 150]),
            title="Avg Yield",
        ),
    )
    .properties(
        width=600,
        height=400,
        title=alt.TitleParams(
            text="Heat Map of Average Corn Yield (2018-2023)",
            subtitle="Corn Yield (bsh/acre) for Counties Grouped by Climate Averages in 2023 ",
            anchor="middle",
        ),
    )
)


heatmap.display()
file_name = "10_HEATMAP_2023"
heatmap.save(f"{output_path}{file_name}.png")


## Temperature Map Relative to Optimal Range

In [19]:
climate_data_1980 = rolling_avg_30yr_climate_data_df[
    rolling_avg_30yr_climate_data_df["Year"].isin([1980])
]
climate_data_1980.rename(columns={"County_Code": "id"}, inplace=True)


def temp_distance_category(temp):
    if 48 <= temp <= 52:
        return "Optimal Range (48 - 52)"
    elif 44 <= temp < 48:
        return "0-4 degrees below range"
    elif 40 <= temp < 44:
        return "4-8 degrees below range"
    elif temp < 40:
        return "More than 8 degrees below range"
    elif 52 < temp <= 56:
        return "0-4 degrees above range"
    elif 56 < temp <= 60:
        return "4-8 degrees above range"
    else:
        return "More than 8 degrees above range"


climate_data_1980["temp_distance_category"] = climate_data_1980["ann_avg_temp"].apply(
    temp_distance_category
)
climate_data_1980 = climate_data_1980.drop(["ann_max_temp", "ann_min_temp"], axis=1)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  climate_data_1980.rename(columns={"County_Code": "id"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  climate_data_1980["temp_distance_category"] = climate_data_1980["ann_avg_temp"].apply(


In [20]:
color_scheme = {
    "More than 8 degrees below range": "#00008B",  # Dark Blue
    "4-8 degrees below range": "#4682B4",  # Steel Blue
    "0-4 degrees below range": "#87CEEB",  # Sky Blue
    "Optimal Range (48 - 52)": "#3CB371",  # Lime Green
    "0-4 degrees above range": "#FFD700",  # Gold
    "4-8 degrees above range": "#FFA500",  # Orange
    "More than 8 degrees above range": "#FF0000",  # Red
}

change_climate_data_gdf = gpd.GeoDataFrame(
    pd.merge(climate_data_1980, midwest_counties_gdf, on="id", how="left")
)
change_climate_data_gdf.set_geometry("geometry", inplace=True)

county_map_background, state_map_background = make_background_maps(
    midwest_counties_gdf, states_gdf
)

county_map_filled = (
    alt.Chart(change_climate_data_gdf)
    .mark_geoshape(stroke="black", strokeWidth=0.5)
    .encode(
        color=alt.Color(
            "temp_distance_category:N",
            scale=alt.Scale(
                domain=list(color_scheme.keys()), range=list(color_scheme.values())
            ),
            legend=alt.Legend(title="Temperature Range"),
        )
    )
    .properties(
        title="Average Temperature Categories Relative to Optimal Temperature Range in 1980",
        width=800,
        height=500,
    )
    .project("albersUsa")
)

layered_map = county_map_background + county_map_filled + state_map_background
layered_map.show()

file_name = "11_TEMP_CAT_MAP_1980"
layered_map.save(f"{output_path}{file_name}.png")


In [21]:
climate_data_2023 = rolling_avg_30yr_climate_data_df[
    rolling_avg_30yr_climate_data_df["Year"].isin([2023])
]
climate_data_2023.rename(columns={"County_Code": "id"}, inplace=True)
climate_data_2023["temp_distance_category"] = climate_data_2023["ann_avg_temp"].apply(
    temp_distance_category
)
climate_data_2023 = climate_data_2023.drop(["ann_max_temp", "ann_min_temp"], axis=1)

change_climate_data_gdf = gpd.GeoDataFrame(
    pd.merge(climate_data_2023, midwest_counties_gdf, on="id", how="left")
)
change_climate_data_gdf.set_geometry("geometry", inplace=True)

county_map_background, state_map_background = make_background_maps(
    midwest_counties_gdf, states_gdf
)

county_map_filled = (
    alt.Chart(change_climate_data_gdf)
    .mark_geoshape(stroke="black", strokeWidth=0.5)
    .encode(
        color=alt.Color(
            "temp_distance_category:N",
            scale=alt.Scale(
                domain=list(color_scheme.keys()), range=list(color_scheme.values())
            ),
            legend=alt.Legend(title="Temperature Range"),
        )
    )
    .properties(
        title="Average Temperature Categories Relative to Optimal Temperature Range in 2023",
        width=800,
        height=500,
    )
    .project("albersUsa")
)

layered_map = county_map_background + county_map_filled + state_map_background
layered_map.show()
file_name = "12_TEMP_CAT_MAP_2023"
layered_map.save(f"{output_path}{file_name}.png")


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  climate_data_2023.rename(columns={"County_Code": "id"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  climate_data_2023["temp_distance_category"] = climate_data_2023["ann_avg_temp"].apply(
