In [None]:
import pandas as pd

# Filters
noaa_midwest_codes = ["11", "12", "13", "14", "20", "21", "23", "25", "32", "33", "39", "47"]

fips_mapping = {
    "11": "17",  # Illinois
    "12": "18",  # Indiana
    "13": "19",  # Iowa
    "14": "20",  # Kansas
    "20": "26",  # Michigan
    "21": "27",  # Minnesota
    "23": "29",  # Missouri
    "25": "31",  # Nebraska
    "32": "38",  # North Dakota
    "33": "39",  # Ohio
    "39": "46",  # South Dakota
    "47": "55"   # Wisconsin
}

final_df_cols = ['Year', 'County_Code', 'state_fips']

def parse_climdiv_data(file_path, yearly_avg_column_name, midwest_codes=noaa_midwest_codes, final_df_cols=final_df_cols):
    # Define the column widths based on the provided positions
    column_specs = [
        (0, 2),    # STATE-CODE (1-2)
        (2, 5),    # DIVISION-NUMBER (3-5)
        (5, 7),    # ELEMENT CODE (6-7)
        (7, 11),   # YEAR (8-11)
        (11, 18),  # JAN-VALUE (12-18)
        (18, 25),  # FEB-VALUE (19-25)
        (25, 32),  # MAR-VALUE (26-32)
        (32, 39),  # APR-VALUE (33-39)
        (39, 46),  # MAY-VALUE (40-46)
        (46, 53),  # JUNE-VALUE (47-53)
        (53, 60),  # JULY-VALUE (54-60)
        (60, 67),  # AUG-VALUE (61-67)
        (67, 74),  # SEPT-VALUE (68-74)
        (74, 81),  # OCT-VALUE (75-81)
        (81, 88),  # NOV-VALUE (82-88)
        (88, 95),  # DEC-VALUE (89-95)
    ]

    # Column names
    column_names = [
        "State_Code", "Division_Number", "Element_Code", "Year",
        "Jan_Value", "Feb_Value", "Mar_Value", "Apr_Value", "May_Value", 
        "Jun_Value", "Jul_Value", "Aug_Value", "Sep_Value", "Oct_Value", 
        "Nov_Value", "Dec_Value"
    ]
    
    # Read the fixed-width file, treating State_Code and Division_Number as strings
    df = pd.read_fwf(file_path, colspecs=column_specs, names=column_names, 
                     dtype={"State_Code": str, "Division_Number": str})

    
    # Create a new 'state_fips' column that maps the State_Code using the fips_mapping dictionary
    df['state_fips'] = df['State_Code'].map(fips_mapping)  # All values are strings

    # Create a new column that combines State_Code and Division_Number
    df['County_Code'] = df['state_fips'] + df['Division_Number']

    # Convert monthly values to numeric, replacing missing indicators
    numeric_columns = column_names[4:]
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

    # Handle missing values based on given missing indicators
    df.replace({
        "Jan_Value": {-99.99: None, -9.99: None},
        "Feb_Value": {-99.99: None, -9.99: None},
        "Mar_Value": {-99.99: None, -9.99: None},
        "Apr_Value": {-99.99: None, -9.99: None},
        "May_Value": {-99.99: None, -9.99: None},
        "Jun_Value": {-99.99: None, -9.99: None},
        "Jul_Value": {-99.99: None, -9.99: None},
        "Aug_Value": {-99.99: None, -9.99: None},
        "Sep_Value": {-99.99: None, -9.99: None},
        "Oct_Value": {-99.99: None, -9.99: None},
        "Nov_Value": {-99.99: None, -9.99: None},
        "Dec_Value": {-99.99: None, -9.99: None}
    }, inplace=True)

    # Calculate the yearly average, ignoring missing values
    df[yearly_avg_column_name] = df[numeric_columns].mean(axis=1)

    midwest_df = df[df['State_Code'].isin(midwest_codes)]

    midwest_df_post1960 = midwest_df[midwest_df['Year'] > 1950]

    output_columns = final_df_cols + [yearly_avg_column_name]
    
    return midwest_df_post1960[output_columns]


In [None]:
precipitation_path = 'data/climate_data/climdiv-pcpncy-v1.0.0-20241021.txt'
avg_temp_path = 'data/climate_data/climdiv-tmpccy-v1.0.0-20241021.txt'
max_temp_path = 'data/climate_data/climdiv-tmaxcy-v1.0.0-20241021.txt'
min_temp_path = 'data/climate_data/climdiv-tmincy-v1.0.0-20241021.txt'
precip_df = parse_climdiv_data(precipitation_path, "ann_avg_precip")
avg_temp_df = parse_climdiv_data(avg_temp_path, "ann_avg_temp")
max_temp_df = parse_climdiv_data(max_temp_path, "ann_max_temp")
min_temp_df = parse_climdiv_data(min_temp_path, "ann_min_temp")
# Merge the DataFrames one by one
merge_cols = ['Year', 'County_Code', 'state_fips']
annual_climate_data_df = precip_df.merge(avg_temp_df, on=merge_cols).merge(max_temp_df, on=merge_cols).merge(min_temp_df, on=merge_cols)

annual_climate_data_df = annual_climate_data_df.sort_values(by=['County_Code', 'Year'])

# Apply rolling mean within each group
rolling_avg_30yr_climate_data_df = (
    annual_climate_data_df
    .groupby('County_Code')[['Year', 'ann_avg_precip', 'ann_avg_temp', 'ann_max_temp', 'ann_min_temp']]
    .apply(lambda x: x.set_index('Year').rolling(window=30).mean())
    .reset_index()
)


In [None]:

annual_climate_data_df = annual_climate_data_df.sort_values(by=['County_Code', 'Year'])

# Apply rolling mean within each group
rolling_avg_30yr_climate_data_df = (
    annual_climate_data_df
    .groupby('County_Code')[['Year', 'ann_avg_precip', 'ann_avg_temp', 'ann_max_temp', 'ann_min_temp']]
    .apply(lambda x: x.set_index('Year').rolling(window=30).mean())
    .reset_index()
)

rolling_avg_30yr_climate_data_df[rolling_avg_30yr_climate_data_df['Year'].isin([1980,2023])]


## Point in Time Maps

In [None]:
from vega_datasets import data

# Load U.S. states and counties
states = data.us_10m.url  # URL for U.S. states
counties = data.us_10m.url  # URL for U.S. counties
print(states)
print(counties)
states_gdf = gpd.read_file(states)
counties_gdf = gpd.read_file(counties)

### 19550-1980


In [None]:
from vega_datasets import data
import pandas as pd
import sqlite3
import geopandas as gpd
import altair as alt


counties = data.us_10m.url  # URL for U.S. counties
counties_gdf = gpd.read_file(counties)

def load_midwest_counties(db_name, table, counties_gdf):
    # SQL query to get distinct state ANSI codes
    query = f"""
    SELECT 
        DISTINCT state_ansi
    FROM {table} 
    """
    
    # Connect to the database and execute the query
    with sqlite3.connect(db_name) as conn:
        check = pd.read_sql(query, conn)

    # Extract the list of state ANSI codes
    state_ansi_list = check.iloc[:, 0].to_list()
    
    # Filter the counties GeoDataFrame to include only Midwest counties
    midwest_counties_gdf = counties_gdf[
        counties_gdf['id'].str[:2].isin(state_ansi_list) & 
        (counties_gdf['id'].str.len() == 5)
    ]

    return midwest_counties_gdf

# Example usage
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
midwest_counties = load_midwest_counties(db_name, table, counties_gdf)


def create_climate_maps(merged_1980, midwest_counties_gdf, climate_metrics):
    
    # backgrounds
    # Define the background chart with a gray fill and black stroke for county borders
    county_map_background = alt.Chart(midwest_counties_gdf).mark_geoshape(
        fill='lightgray',  # Background color
        stroke='black',    # Outline color for counties
        strokeWidth=0.5   # Thickness of county borders
    ).properties(
        width=800,
        height=500
    ).project('albersUsa')  # Use Albers USA projection

    # Load U.S. states data
    states = alt.topo_feature(data.us_10m.url, 'states')

    # Define state IDs for the Midwestern states
    midwestern_state_ids = [17, 18, 19, 20, 26, 27, 29, 31, 38, 39, 46, 55]

    # Filter the background chart for the selected states and add black borders
    state_map_background = alt.Chart(states).mark_geoshape(
        fill=None,
        stroke='black',  # Set border color to black
        strokeWidth=1.5  # Adjust width as needed
    ).transform_filter(
        alt.FieldOneOfPredicate(field='id', oneOf=midwestern_state_ids)
    ).properties(
        width=800,
        height=500
    ).project('albersUsa')
    
    
    
    for metric in climate_metrics:
        columns = ['id', 'geometry']
        metric_df = merged_1980[columns + [metric]]

        # Define the filled map chart
        county_map_filled = alt.Chart(metric_df).mark_geoshape(
            stroke='black',   # Outline color for counties
            strokeWidth=0.5   # Thickness of county borders
        ).encode(
            color=alt.Color(f'{metric}:Q', scale=alt.Scale(scheme='blues')),  # Sequential color scale for the metric
            tooltip=['id:N', f'{metric}:Q']  # Tooltip with county ID and metric value
        ).properties(
            title=f'Map of Production for {metric}',
            width=800,
            height=500
        ).project('albersUsa')  # Use Albers USA projection

        # Layer the filled map on top of the gray background
        layered_map = county_map_background + county_map_filled + state_map_background
        # Display the chart
        layered_map.show()

# Example usage



In [None]:
from vega_datasets import data
import pandas as pd
import sqlite3
import altair as alt
import geopandas as gpd

db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
midwest_counties_gdf = load_midwest_counties(db_name, table, counties_gdf)

climate_1980 = rolling_avg_30yr_climate_data_df[rolling_avg_30yr_climate_data_df['Year']==1980]
climate_1980.rename(columns={'County_Code': 'id'}, inplace=True)

# Merge the result DataFrame with the GeoDataFrame
merged_1980 = gpd.GeoDataFrame(pd.merge(climate_1980, midwest_counties_gdf, on='id', how='left'))

# Set the geometry for the GeoDataFrame
merged_1980.set_geometry('geometry', inplace=True)
print(len(merged_1980))

climate_metrics = ['ann_avg_precip', 'ann_avg_temp', 'ann_max_temp', 'ann_min_temp']
create_climate_maps(merged_1980, midwest_counties_gdf, climate_metrics)

## Change Maps

In [None]:
import pandas as pd
def calc_differnce(metric):
    # Assuming rolling_avg_30yr_climate_data_df is your DataFrame
    # Step 1: Filter the DataFrame
    filtered_df = rolling_avg_30yr_climate_data_df[rolling_avg_30yr_climate_data_df['Year'].isin([1980, 2023])]

    # Step 2: Pivot the DataFrame to make 'Year' columns
    pivot_df = filtered_df.pivot(index='County_Code', columns='Year', values=metric)

    # Step 3: Calculate the difference between 2023 and 1980
    pivot_df[f'{metric}_change'] = pivot_df[2023] - pivot_df[1980]

    # Reset index if needed
    pivot_df.reset_index(inplace=True)

    # Now pivot_df contains the difference for each County_Code
    return pivot_df, f'{metric}_change'

climate_metrics= ['ann_avg_precip', 'ann_avg_temp', 'ann_max_temp', 'ann_min_temp']
def gen_change_df(climate_metrics):
    df_list = []
    for metric in climate_metrics:
        change_df, col_name = calc_differnce(metric)
        df_list.append(change_df[['County_Code', col_name]])
        #df_list.append(change_df)
    return df_list
    


In [None]:
climate_metrics= ['ann_avg_precip', 'ann_avg_temp', 'ann_max_temp', 'ann_min_temp']
df_d  = gen_change_df(climate_metrics)


In [None]:
import pandas as pd

def calc_difference(metric, type):
    # Assuming rolling_avg_30yr_climate_data_df is your DataFrame
    # Step 1: Filter the DataFrame
    filtered_df = rolling_avg_30yr_climate_data_df[rolling_avg_30yr_climate_data_df['Year'].isin([1980, 2023])]

    # Step 2: Pivot the DataFrame to make 'Year' columns
    pivot_df = filtered_df.pivot(index='County_Code', columns='Year', values=metric)

    # Step 3: Calculate the difference between 2023 and 1980
    if type == "abs_change":
        pivot_df[f'{metric}_{type}'] = pivot_df[2023] - pivot_df[1980]
    if type == "pct_change":
        pivot_df[f'{metric}_{type}'] = ((pivot_df[2023] - pivot_df[1980]) / pivot_df[1980])*100

    # Reset index if needed
    pivot_df.reset_index(inplace=True)

    # Now pivot_df contains the difference for each County_Code
    return pivot_df, f'{metric}_{type}'

def gen_change_df(climate_metrics, type):
    change_df_list = []
    for metric in climate_metrics:
        change_df, col_name = calc_difference(metric, type)
        # Append the relevant columns to the list
        change_df_list.append(change_df[['County_Code', col_name]])
    
    # Concatenate all DataFrames in the list into a single DataFrame
    merge_cols = [ 'County_Code']
    change_climate_data_df = change_df_list[0].merge(change_df_list[1], on=merge_cols).merge(change_df_list[2], on=merge_cols).merge(change_df_list[3], on=merge_cols)
    change_climate_data_df.reset_index(drop=True, inplace=True)
    return change_climate_data_df

# Example usage
climate_metrics = ['ann_avg_precip', 'ann_avg_temp', 'ann_max_temp', 'ann_min_temp']
abs_change_climate_data_df = gen_change_df(climate_metrics,  "abs_change")
pct_change_climate_data_df = gen_change_df(climate_metrics,  "pct_change")
abs_change_climate_data_df
pct_change_climate_data_df



In [None]:
pct_change_climate_data_df.describe()


##  Absolute Change Maps

In [None]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'

#counties = data.us_10m.url  # URL for U.S. counties
counties_gdf = gpd.read_file(counties)
midwest_counties_gdf = load_midwest_counties(db_name, table, counties_gdf)


abs_change_climate_data_df.rename(columns={'County_Code': 'id'}, inplace=True)

# Merge the result DataFrame with the GeoDataFrame
change_climate_data_gdf = gpd.GeoDataFrame(pd.merge(abs_change_climate_data_df, midwest_counties_gdf, on='id', how='left'))

# Set the geometry for the GeoDataFrame
change_climate_data_gdf.set_geometry('geometry', inplace=True)
print(len(change_climate_data_gdf))

climate_metrics = ['ann_avg_precip_abs_change', 'ann_avg_temp_abs_change', 'ann_max_temp_abs_change', 'ann_min_temp_abs_change']
create_climate_maps(change_climate_data_gdf, midwest_counties_gdf, climate_metrics)

## Pct Change Maps

In [None]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
version = 'pct'

#counties = data.us_10m.url  # URL for U.S. counties

midwest_counties_gdf = load_midwest_counties(db_name, table, counties_gdf)


pct_change_climate_data_df.rename(columns={'County_Code': 'id'}, inplace=True)

# Merge the result DataFrame with the GeoDataFrame
change_climate_data_gdf = gpd.GeoDataFrame(pd.merge(pct_change_climate_data_df, midwest_counties_gdf, on='id', how='left'))

# Set the geometry for the GeoDataFrame
change_climate_data_gdf.set_geometry('geometry', inplace=True)
print(len(change_climate_data_gdf))

climate_metrics = [f'ann_avg_precip_{version}_change', f'ann_avg_temp_{version}_change', f'ann_max_temp_{version}_change', f'ann_min_temp_{version}_change']
create_climate_maps(change_climate_data_gdf, midwest_counties_gdf, climate_metrics)