In [1]:
import pandas as pd
import sqlite3
import altair as alt
import geopandas as gpd

# Regional Analysis

## Total Production

In [None]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'

query = f"""
Select 
    commodity_desc,
    year, 
    sum(value) as total_prod
from {table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
group by 
    commodity_desc, year
"""

conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)

# Create the Altair line chart
chart = alt.Chart(result).mark_area().encode(
    x='year:O',  # Treat 'year' as an ordinal value (categorical)
    y='total_prod:Q',
    color='commodity_desc:N'        
    ).properties(
        title='Total Production in Crop Over Time',
        width=600,
        height=400
    )
chart


# State Analysis


## Production

### Aggregated by State

In [None]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'

query = f"""
Select 
    state_alpha,
    year, 
    sum(value) as total_prod
from {table} 
where short_desc != 'CORN, SILAGE - PRODUCTION, MEASURED IN TONS'
group by 
    state_alpha, year
"""

conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)

# Create the Altair line chart
chart = alt.Chart(result).mark_line().encode(
    x='year:O',  # Treat 'year' as an ordinal value (categorical)
    y='total_prod:Q',
    color='state_alpha:N'        
    ).properties(
        title='Total Production in Crop Over Time',
        width=600,
        height=400
    )
chart

## Area Planted

### total area planted aggregated by state

### Land Usage Change by state

In [None]:
db_name = 'field_crops.db'
table = 'midwest_area_planted_cleaned'

query = f"""
Select 
    avg(value) AS Value_20,
    commodity_desc,
    state_alpha
from {table} 
where asd_code != 99
and county_ansi != ""
and year between 2015 and 2020
group by state_alpha, commodity_desc
"""
conn = sqlite3.connect(db_name) 
avg_area_2015_2020 = pd.read_sql(query, conn)

query = f"""
Select 
    avg(value) AS Value_70,
    commodity_desc,
    state_alpha
from {table} 
where asd_code != 99
and county_ansi != ""
and year between 1975 and 1980
group by state_alpha, commodity_desc
"""
avg_area_1965_1970 = pd.read_sql(query, conn)

prod_change = pd.merge(avg_area_2015_2020, avg_area_1965_1970, on=["commodity_desc", "state_alpha"])
prod_change["perc_change_in_area_planted"] = ((prod_change['Value_20'] -  prod_change['Value_70']) / prod_change['Value_70'])*100


chart = alt.Chart(prod_change).mark_bar().encode(
    #x=alt.X('state_alpha:O', title='Category'),
    y=alt.Y('perc_change_in_area_planted:Q', title='Percentage Change in Area Planted'),
    color='commodity_desc:N',
    column='state_alpha:N'  # Separate columns for each state
).properties(
    title='Stacked Bar Chart by State'
).configure_title(
    fontSize=20,
    anchor='start'
)
chart = alt.Chart(prod_change).mark_bar().encode(
    x=alt.X('state_alpha:O', title='Category'),  # Categories on x-axis
    y=alt.Y('perc_change_in_area_planted:Q', title='Percentage Change in Area Planted'),  # Values on y-axis
    color='commodity_desc:N',  # Color by state
    xOffset='commodity_desc:N'  # Offset bars by state to group them within each category
).properties(
    title='Grouped Bar Chart by State'
).configure_title(
    fontSize=20,
    anchor='start'
)

chart

# County Level


In [None]:
from vega_datasets import data

# Load U.S. states and counties
states = data.us_10m.url  # URL for U.S. states
counties = data.us_10m.url  # URL for U.S. counties
print(states)
print(counties)

In [None]:
states_gdf = gpd.read_file(states)
counties_gdf = gpd.read_file(counties)

## Total Land Usage 

### Point in Time (5 year average from 2015 - 2020) map

In [None]:
db_name = 'field_crops.db'
table = 'midwest_area_planted_cleaned'

query = f"""
Select 
    value,
    commodity_desc,
    state_ansi|| county_ansi as id
from {table} 
where year = 2020
and asd_code != 99

"""
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)

# Merge the result DataFrame with the GeoDataFrame
merged = gpd.GeoDataFrame(pd.merge(result, counties_gdf, on='id', how='left'))

# Set the geometry for the GeoDataFrame
merged.set_geometry('geometry', inplace=True)


query = f"""
Select 
    distinct
    state_ansi
from {table} 
"""
conn = sqlite3.connect(db_name) 
check = pd.read_sql(query, conn)
state_ansi_list = check.iloc[:,0].to_list()
midwest_counties_gdf = counties_gdf[counties_gdf['id'].str[:2].isin(state_ansi_list)]
midwest_counties_gdf = counties_gdf[
    counties_gdf['id'].str[:2].isin(state_ansi_list) &  # Check if first 2 characters are in the state ANSI list
    (counties_gdf['id'].str.len() == 5)  # Ensure 'id' has exactly 5 characters
]

crop_list = [ 'CORN', 'SOYBEANS', 'WHEAT']

for crop in crop_list:
    crop_df = merged[merged['commodity_desc']== crop]

    # Define the background chart with a gray fill and black stroke for county borders
    county_map_background = alt.Chart(midwest_counties_gdf).mark_geoshape(
        fill='lightgray',  # Background color
        stroke='black',    # Outline color for counties
        strokeWidth=0.5    # Thickness of county borders
    ).properties(
        width=800,
        height=500
    ).project('albersUsa')  # Use Albers USA projection

    # Define the filled map chart
    county_map_filled = alt.Chart(crop_df).mark_geoshape(
        stroke='black',   # Outline color for counties
        strokeWidth=0.5   # Thickness of county borders
    ).encode(
        color=alt.Color('Value:Q', scale=alt.Scale(scheme='blues')),  # Sequential color scale for the 'value' column
        tooltip=['id:N', 'Value:Q']  # Tooltip with county ID and value
    ).properties(
        title=f'Map of Area Planted for {crop}',
        width=800,
        height=500
    ).project('albersUsa')  # Use Albers USA projection

    # Layer the filled map on top of the gray background
    layered_map = county_map_background + county_map_filled

    # Display the chart
    layered_map.show()


In [None]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'

query = f"""
Select 
    avg(value) AS Value,
    commodity_desc,
    state_ansi|| county_ansi as id
from {table} 
where asd_code != 99
and year between 2015 and 2020
group by state_ansi|| county_ansi, commodity_desc


"""
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)

query = f"""
Select 
    distinct
    state_ansi
from {table} 
"""
conn = sqlite3.connect(db_name) 
check = pd.read_sql(query, conn)
counties_gdf = gpd.read_file(counties)
state_ansi_list = check.iloc[:,0].to_list()
counties_gdf = counties_gdf[counties_gdf['id'].str.len() == 5]
midwest_counties_gdf = counties_gdf[counties_gdf['id'].str[:2].isin(state_ansi_list)]

# Get the centroid of each polygon
midwest_counties_gdf['centroid'] = midwest_counties_gdf['geometry'].centroid

# Extract longitude (x) and latitude (y) from the centroid
midwest_counties_gdf['longitude'] = midwest_counties_gdf['centroid'].x
midwest_counties_gdf['latitude'] = midwest_counties_gdf['centroid'].y


# Merge the result DataFrame with the GeoDataFrame
merged = gpd.GeoDataFrame(pd.merge(result, midwest_counties_gdf, on='id', how='inner'))

# Set the geometry for the GeoDataFrame
merged.set_geometry('geometry', inplace=True)

heatmap_df = merged[['longitude', 'latitude', 'Value']]
heatmap = (
    alt.Chart(heatmap_df)
    .mark_rect()
    .encode(
        x=alt.X('longitude:Q', bin=alt.Bin(maxbins=30), title='Longitude'),
        y=alt.Y('latitude:Q', bin=alt.Bin(maxbins=30), title='Latitude'),
        color=alt.Color('sum(Value):Q', scale=alt.Scale(scheme='viridis'), title='Value'),
        tooltip=['longitude:Q', 'latitude:Q', 'sum(Value):Q']
    )
    .properties(
        width=600,
        height=400,
        title='Heat Map of Values by Longitude and Latitude'
    )
)

heatmap.display()
