# API Call Function

In [1]:
import pandas as pd
import requests
import json
import sqlite3
import altair as alt

def get_crop_data_by_state(api_key, state_code, decade, group, util, statistic_cat):
    # Define API endpoint and parameters
    base_url = "https://quickstats.nass.usda.gov/api/api_GET/"
    params = {
        'key': api_key,
        'source_desc': 'Survey',
        'group_desc': group,
        'statisticcat_desc': statistic_cat,  # WHAT (commodity)
        'prodn_practice_desc': util,
        'year__LIKE': decade,                   # WHEN (year since 2010)
        'state_alpha': state_code,          # Pass the state code as an argument
        'agg_level_desc': 'County',         # WHERE (level of aggregation)
        'format': 'json'                    # Return the data in JSON format
    }

    # Send GET request
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()  # Parse the JSON data
        data2 = data['data']
        df = pd.DataFrame(data2)
        return df  # Return the data object
    else:
        print(f"Error: {response.status_code}")
        return None  # Return None if there was an error

In [5]:
def save_field_crops_to_db(df, db_name='field_crops.db', table_name='field_crops'):
    """
    Saves the specified DataFrame to an SQLite database table and prints the number of new records added.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing the field crops data.
    db_name (str): The name of the SQLite database file (default is 'field_crops.db').
    """
    # Step 1: Connect to the SQLite database
    conn = sqlite3.connect(db_name)  # Creates an SQLite database file
    cursor = conn.cursor()

    # Filter the DataFrame by renaming the 'CV (%)' column to 'CV'
    df2 = df.rename(columns={'CV (%)': 'CV'})

    df2['Value'] = df2['Value'].str.replace(',', '')
    df2['Value'] = pd.to_numeric(df2['Value'], errors='coerce')

    df3 = specify_crops(df2)


    # Step 3: Create the 'field_crops' table if it doesn't already exist
    create_table_query = f'''
        CREATE TABLE IF NOT EXISTS {table_name} (
            reference_period_desc TEXT,
            domain_desc TEXT,
            load_time TEXT,
            year INTEGER,
            Value REAL,
            short_desc TEXT,
            util_practice_desc TEXT,
            county_name TEXT,
            end_code TEXT,
            state_ansi TEXT,
            country_name TEXT,
            CV REAL,
            county_ansi TEXT,
            domaincat_desc TEXT,
            congr_district_code TEXT,
            asd_code TEXT,
            freq_desc TEXT,
            asd_desc TEXT,
            agg_level_desc TEXT,
            commodity_desc TEXT,
            county_code TEXT,
            statisticcat_desc TEXT,
            zip_5 TEXT,
            source_desc TEXT,
            state_fips_code TEXT,
            region_desc TEXT,
            watershed_code TEXT,
            prodn_practice_desc TEXT,
            sector_desc TEXT,
            week_ending TEXT,
            watershed_desc TEXT,
            state_alpha TEXT,
            state_name TEXT,
            class_desc TEXT,
            begin_code TEXT,
            unit_desc TEXT,
            country_code TEXT,
            location_desc TEXT,
            group_desc TEXT
        )
        '''
    cursor.execute(create_table_query)

    # Step 4: Count records before insertion
    cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    initial_count = cursor.fetchone()[0]

    # Insert the DataFrame into the 'field_crops' table
    df3.to_sql(table_name, conn, if_exists='append', index=False)

    # Step 5: Count records after insertion
    cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    new_count = cursor.fetchone()[0]

    # Calculate the number of new records added
    num_new_records = new_count - initial_count

    # Print the result
    print(f"{num_new_records} new records added to the '{table_name}' table.")

    # Commit the changes and close the connection
    conn.commit()
    conn.close()


def specify_crops(df, crop_list = ['CORN', 'SOYBEANS', 'WHEAT']):
    df_clean = df[df['commodity_desc'].isin(crop_list)]
    return df_clean

def save_cleaned_field_crops_to_db(original_table,
                                   cleaned_table, 
                                   db_name='field_crops.db',
                                   exclude_short_desc=None):
    """
    Reads the original table from the SQLite database, cleans it by excluding records with certain short_desc values, 
    and saves the cleaned data to a new table.

    Parameters:
    db_name (str): The name of the SQLite database file (default is 'field_crops.db').
    original_table (str): The name of the original table to read data from (default is 'field_crops').
    cleaned_table (str): The name of the new table to save the cleaned data (default is 'field_crops_cleaned').
    exclude_short_desc (list or set): A list or set of short_desc values to exclude (default is None).
    """
    # Step 1: Connect to the SQLite database
    conn = sqlite3.connect(db_name)
    
    # Step 2: Read data from the original table
    query = f"SELECT * FROM {original_table}"
    df = pd.read_sql_query(query, conn)

    # Step 5: Apply the filtering logic to exclude specific short_desc values (if provided)
    if exclude_short_desc:
        df = df[~df['short_desc'].isin(exclude_short_desc)]

    # Step 6: Create the new table with the same schema as the original table
    create_table_query = f'''
        CREATE TABLE IF NOT EXISTS {cleaned_table} (
            reference_period_desc TEXT,
            domain_desc TEXT,
            load_time TEXT,
            year INTEGER,
            Value REAL,
            short_desc TEXT,
            util_practice_desc TEXT,
            county_name TEXT,
            end_code TEXT,
            state_ansi TEXT,
            country_name TEXT,
            CV REAL,
            county_ansi TEXT,
            domaincat_desc TEXT,
            congr_district_code TEXT,
            asd_code TEXT,
            freq_desc TEXT,
            asd_desc TEXT,
            agg_level_desc TEXT,
            commodity_desc TEXT,
            county_code TEXT,
            statisticcat_desc TEXT,
            zip_5 TEXT,
            source_desc TEXT,
            state_fips_code TEXT,
            region_desc TEXT,
            watershed_code TEXT,
            prodn_practice_desc TEXT,
            sector_desc TEXT,
            week_ending TEXT,
            watershed_desc TEXT,
            state_alpha TEXT,
            state_name TEXT,
            class_desc TEXT,
            begin_code TEXT,
            unit_desc TEXT,
            country_code TEXT,
            location_desc TEXT,
            group_desc TEXT
        )
    '''
    conn.execute(create_table_query)

    # Step 7: Insert the cleaned data into the new table
    df.to_sql(cleaned_table, conn, if_exists='replace', index=False)

    # Step 8: Commit and close the connection
    conn.commit()
    conn.close()
    print(f"Cleaned data saved to {db_name} in new table {cleaned_table}.")


In [8]:
def gather_all_states(api_key, state_list, decade_list, group, statistic_cat, util, table_name):
    for state in state_list:
        print(f"pulling data for {state}")
        for decade in decade_list:
            print(f"pulling for decade {decade}")
            state_df = get_crop_data_by_state(api_key, state, decade, group, util, statistic_cat)
            if state_df is None:
                print(f"no records for {state} in {decade}")
                pass
            else:
                save_field_crops_to_db(state_df, table_name=table_name)

## midwest key crops

In [None]:
#midwest table for key crops (corn, )
api_key = "5E3FD1A2-A5C3-32B7-98FE-FA19B14B8652" # Replace with your actual API key  
group = "Field Crops"
static_of_interest = "Production"
midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
util  = 'ALL PRODUCTION PRACTICES'

decade_list = ['196%', '197%','198%','199%','200%','201%', '202%']
gather_all_states(api_key, midwestern_states, decade_list, group, static_of_interest, util, table_name='midwest_key_field_crops')

save_cleaned_field_crops_to_db(original_table='midwest_key_field_crops', 
                               cleaned_table='midwest_key_field_crops_cleaned',
                               exclude_short_desc=['WHEAT - PRODUCTION, MEASURED IN BU'])

In [None]:
#https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.html
county_areas = pd.read_csv("data/2024_Gaz_counties_national.txt", sep="\t")
county_areas_midwest = county_areas[county_areas['USPS'].isin(midwestern_states)]
county_areas_midwest

### Midwest Area Planted for Key Crops

In [None]:
#midwest table for key crops (corn, )
api_key = "5E3FD1A2-A5C3-32B7-98FE-FA19B14B8652" # Replace with your actual API key  
group = "Field Crops"
static_of_interest = "Area Planted"
midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
util  = 'ALL PRODUCTION PRACTICES'

decade_list = ['196%', '197%','198%','199%','200%','201%', '202%']
gather_all_states(api_key, midwestern_states, decade_list, group, static_of_interest, util, table_name='midwest_area_planted')

In [54]:
save_cleaned_field_crops_to_db(original_table='midwest_area_planted', 
                               cleaned_table='midwest_area_planted_cleaned',
                               exclude_short_desc=['WHEAT - ACRES PLANTED'])

Cleaned data saved to field_crops.db in new table midwest_area_planted_cleaned.


In [56]:
db_name = 'field_crops.db'
table = 'midwest_area_planted_cleaned'
query = f"Select commodity_desc, state_alpha, short_desc, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
result

Unnamed: 0,commodity_desc,state_alpha,short_desc,total_prod,count(*)
0,CORN,IA,CORN - ACRES PLANTED,802987000.0,6292
1,CORN,IL,CORN - ACRES PLANTED,581870000.0,5195
2,CORN,IN,CORN - ACRES PLANTED,298670000.0,4642
3,CORN,KS,CORN - ACRES PLANTED,177796000.0,6293
4,CORN,MI,CORN - ACRES PLANTED,151461000.0,4346
5,CORN,MN,CORN - ACRES PLANTED,377235800.0,4150
6,CORN,MO,CORN - ACRES PLANTED,185859000.0,6374
7,CORN,ND,CORN - ACRES PLANTED,91768000.0,3182
8,CORN,NE,CORN - ACRES PLANTED,492621000.0,5700
9,CORN,OH,CORN - ACRES PLANTED,186263000.0,4426


In [None]:
db_name = 'field_crops.db'
table = 'midwest_area_planted_cleaned'
query = f"Select commodity_desc, state_alpha, short_desc, prodn_practice_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc, year"

conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

unique_commodities = result['commodity_desc'].unique()

midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
good_data_commodities = [ 'CORN', 'SOYBEANS', 'WHEAT']

for commodity in good_data_commodities:
    for state in midwestern_states:
        df = result[result['commodity_desc'] == commodity]
        df = df[df['state_alpha'] == state]
        #df = df[df['prodn_practice_desc'] == 'ALL PRODUCTION PRACTICES']

        #df = df[df['year'] <= 2018]
        # Create the Altair line chart
        chart = alt.Chart(df).mark_line().encode(
        x='year:O',  # Treat 'year' as an ordinal value (categorical)
        y='total_prod:Q',
        color='short_desc:N'
        ).properties(
            title=f'Total Production in {state} by {commodity} Over Time',
            width=600,
            height=400
        )

        df_2018 = df[df['year'] == 2023]
        print(df_2018.groupby(['short_desc'])['total_prod'].sum().reset_index(name='Sum_Value'))



        # Display the chart
        chart.show()

### Create Cleaned table

# Data Exploration

In [None]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
query = f"Select commodity_desc, state_alpha, short_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc, year"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
result

In [40]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
query = f"Select commodity_desc, state_alpha, short_desc, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
result

Unnamed: 0,commodity_desc,state_alpha,short_desc,total_prod,count(*)
0,CORN,IA,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",104610600000.0,6292
1,CORN,IA,"CORN, SILAGE - PRODUCTION, MEASURED IN TONS",366368000.0,4348
2,CORN,IL,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",93258560000.0,6419
3,CORN,IL,"CORN, SILAGE - PRODUCTION, MEASURED IN TONS",95775600.0,330
4,CORN,IN,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",44446030000.0,5746
5,CORN,IN,"CORN, SILAGE - PRODUCTION, MEASURED IN TONS",65626000.0,279
6,CORN,KS,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",19230790000.0,6269
7,CORN,KS,"CORN, SILAGE - PRODUCTION, MEASURED IN TONS",110275000.0,3568
8,CORN,MI,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",14341690000.0,4301
9,CORN,MI,"CORN, SILAGE - PRODUCTION, MEASURED IN TONS",166418000.0,2530


## REgion Level

In [32]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
query = f"Select commodity_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, year"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows

# Create the Altair line chart
chart = alt.Chart(result).mark_line().encode(
    x='year:O',  # Treat 'year' as an ordinal value (categorical)
    y='total_prod:Q',  # Quantitative axis for total production
    color='commodity_desc:N',  # Separate lines for each commodity
    tooltip=['year', 'commodity_desc', 'total_prod']  # Tooltips for details on hover
).properties(
    title='Total Production by Commodity Over Time',
    width=600,
    height=400
).interactive()  # Make the chart interactive (zoom and pan)

# Display the chart
chart.show()

## crops with better data population

## Key Crops

### 1960 - 2023

In [37]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
query = f"Select commodity_desc, state_alpha, prodn_practice_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, year"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

unique_commodities = result['commodity_desc'].unique()

good_data_commodities = [ 'CORN', 'SOYBEANS', 'WHEAT']

for commodity in good_data_commodities:
    df = result[result['commodity_desc'] == commodity]
    #df = df[df['year'] <= 2018]
    # Create the Altair line chart
    chart = alt.Chart(df).mark_line().encode(
    x='year:O',  # Treat 'year' as an ordinal value (categorical)
    y='total_prod:Q',
    color='state_alpha:N'
    ).properties(
        title=f'Total Production by {commodity} Over Time',
        width=600,
        height=400
    )

    df_2018 = df[df['year'] == 2023]
    print(df_2018.groupby(['state_alpha'])['total_prod'].sum().reset_index(name='Sum_Value'))

    

    # Display the chart
    chart.show()

   state_alpha     Sum_Value
0           IA  2.530150e+09
1           IL  2.276300e+09
2           IN  1.077930e+09
3           KS  6.128500e+08
4           MI  3.460800e+08
5           MN  1.519775e+09
6           MO  5.615100e+08
7           ND  5.469000e+08
8           NE  1.729000e+09
9           OH  6.732000e+08
10          SD  8.542400e+08
11          WI  5.690200e+08


Empty DataFrame
Columns: [state_alpha, Sum_Value]
Index: []


   state_alpha    Sum_Value
0           IA  573040000.0
1           IL  648900000.0
2           IN  334280000.0
3           KS  104780000.0
4           MI   93380000.0
5           MN  349440000.0
6           MO  264960000.0
7           ND  218680000.0
8           NE  266770000.0
9           OH  274340000.0
10          SD  223080000.0
11          WI  105060000.0


   state_alpha    Sum_Value
0           IL   67860000.0
1           IN   30820000.0
2           KS  201250000.0
3           MI   46480000.0
4           MN   78120000.0
5           MO   42000000.0
6           ND  307845000.0
7           NE   36960000.0
8           OH   53100000.0
9           SD   60850000.0
10          WI   17480000.0


In [38]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
query = f"Select commodity_desc, short_desc, year, sum(value) as total_prod, count(*) from {table} where short_desc != 'WHEAT - PRODUCTION, MEASURED IN BU' group by commodity_desc, short_desc, year"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

unique_commodities = result['commodity_desc'].unique()

good_data_commodities = [ 'CORN',  'SOYBEANS', 'WHEAT']

for commodity in good_data_commodities:
    df = result[result['commodity_desc'] == commodity]
    #df = df[df['year'] <= 2018]
    # Create the Altair line chart
    chart = alt.Chart(df).mark_line().encode(
    x='year:O',  # Treat 'year' as an ordinal value (categorical)
    y='total_prod:Q',
    color='short_desc:N'
    ).properties(
        title=f'Total Production by {commodity} Over Time',
        width=600,
        height=400
    )

    df_2018 = df[df['year'] == 2023]
    print(df_2018.groupby(['short_desc'])['total_prod'].sum().reset_index(name='Sum_Value'))

    

    # Display the chart
    chart.show()

                                    short_desc     Sum_Value
0     CORN, GRAIN - PRODUCTION, MEASURED IN BU  1.326300e+10
1  CORN, SILAGE - PRODUCTION, MEASURED IN TONS  3.395500e+07


                              short_desc     Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  3.456710e+09


                                          short_desc    Sum_Value
0  WHEAT, SPRING, (EXCL DURUM) - PRODUCTION, MEAS...  373790000.0
1  WHEAT, SPRING, DURUM - PRODUCTION, MEASURED IN BU   32005000.0
2         WHEAT, WINTER - PRODUCTION, MEASURED IN BU  536970000.0


In [39]:
db_name = 'field_crops.db'
table = 'midwest_key_field_crops_cleaned'
query = f"Select commodity_desc, state_alpha, short_desc, prodn_practice_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc, year"

conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

unique_commodities = result['commodity_desc'].unique()

midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
good_data_commodities = [ 'CORN', 'SOYBEANS', 'WHEAT']

for commodity in good_data_commodities:
    for state in midwestern_states:
        df = result[result['commodity_desc'] == commodity]
        df = df[df['state_alpha'] == state]
        #df = df[df['prodn_practice_desc'] == 'ALL PRODUCTION PRACTICES']

        #df = df[df['year'] <= 2018]
        # Create the Altair line chart
        chart = alt.Chart(df).mark_line().encode(
        x='year:O',  # Treat 'year' as an ordinal value (categorical)
        y='total_prod:Q',
        color='short_desc:N'
        ).properties(
            title=f'Total Production in {state} by {commodity} Over Time',
            width=600,
            height=400
        )

        df_2018 = df[df['year'] == 2023]
        print(df_2018.groupby(['short_desc'])['total_prod'].sum().reset_index(name='Sum_Value'))



        # Display the chart
        chart.show()

                                 short_desc     Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  2.276300e+09


                                 short_desc     Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  1.077930e+09


                                    short_desc     Sum_Value
0     CORN, GRAIN - PRODUCTION, MEASURED IN BU  2.522550e+09
1  CORN, SILAGE - PRODUCTION, MEASURED IN TONS  7.600000e+06


                                 short_desc    Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  612850000.0


                                 short_desc    Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  346080000.0


                                    short_desc     Sum_Value
0     CORN, GRAIN - PRODUCTION, MEASURED IN BU  1.513300e+09
1  CORN, SILAGE - PRODUCTION, MEASURED IN TONS  6.475000e+06


                                 short_desc    Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  561510000.0


                                 short_desc     Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  1.729000e+09


                                    short_desc    Sum_Value
0     CORN, GRAIN - PRODUCTION, MEASURED IN BU  543400000.0
1  CORN, SILAGE - PRODUCTION, MEASURED IN TONS    3500000.0


                                 short_desc    Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  673200000.0


                                 short_desc    Sum_Value
0  CORN, GRAIN - PRODUCTION, MEASURED IN BU  854240000.0


                                    short_desc    Sum_Value
0     CORN, GRAIN - PRODUCTION, MEASURED IN BU  552640000.0
1  CORN, SILAGE - PRODUCTION, MEASURED IN TONS   16380000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  648900000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  334280000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  573040000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  104780000.0


                              short_desc   Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  93380000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  349440000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  264960000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  266770000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  218680000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  274340000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  223080000.0


                              short_desc    Sum_Value
0  SOYBEANS - PRODUCTION, MEASURED IN BU  105060000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  67860000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  30820000.0


Empty DataFrame
Columns: [short_desc, Sum_Value]
Index: []


                                   short_desc    Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  201250000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  46480000.0


                                          short_desc   Sum_Value
0  WHEAT, SPRING, (EXCL DURUM) - PRODUCTION, MEAS...  78120000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  42000000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  36960000.0


                                          short_desc    Sum_Value
0  WHEAT, SPRING, (EXCL DURUM) - PRODUCTION, MEAS...  267720000.0
1  WHEAT, SPRING, DURUM - PRODUCTION, MEASURED IN BU   32005000.0
2         WHEAT, WINTER - PRODUCTION, MEASURED IN BU    8120000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  53100000.0


                                          short_desc   Sum_Value
0  WHEAT, SPRING, (EXCL DURUM) - PRODUCTION, MEAS...  27950000.0
1         WHEAT, WINTER - PRODUCTION, MEASURED IN BU  32900000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  17480000.0


In [33]:
db_name = 'field_crops.db'
table = 'midwest_field_crops'
query = f"Select commodity_desc, state_alpha, short_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc, year"

conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

unique_commodities = result['commodity_desc'].unique()

midwestern_states = ['NE', 'ND', 'SD']
good_data_commodities = [  'WHEAT']

for commodity in good_data_commodities:
    for state in midwestern_states:
        df = result[result['commodity_desc'] == commodity]
        df = df[df['state_alpha'] == state]
        df = df[df['short_desc'] == 'WHEAT, WINTER - PRODUCTION, MEASURED IN BU']
        #df = df[df['year'] <= 2018]
        # Create the Altair line chart
        chart = alt.Chart(df).mark_line().encode(
        x='year:O',  # Treat 'year' as an ordinal value (categorical)
        y='total_prod:Q',
        color='short_desc:N'
        ).properties(
            title=f'Total Production in {state} by {commodity} Over Time',
            width=600,
            height=400
        )

        df_2018 = df[df['year'] == 2023]
        print(df_2018.groupby(['short_desc'])['total_prod'].sum().reset_index(name='Sum_Value'))



        # Display the chart
        chart.show()

db_name = 'field_crops.db'
table = 'midwest_field_crops'
query = f"Select commodity_desc, state_alpha, short_desc, prodn_practice_desc, year, sum(value) as total_prod, count(*) from {table} group by commodity_desc, state_alpha, short_desc, year"

conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

unique_commodities = result['commodity_desc'].unique()

midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
good_data_commodities = [ 'CORN', 'SOYBEANS', 'WHEAT']

for commodity in good_data_commodities:
    for state in midwestern_states:
        df = result[result['commodity_desc'] == commodity]
        df = df[df['state_alpha'] == state]
        df = df[df['prodn_practice_desc'] == 'ALL PRODUCTION PRACTICES']


        # Create the Altair line chart
        chart = alt.Chart(df).mark_line().encode(
        x='year:O',  # Treat 'year' as an ordinal value (categorical)
        y='total_prod:Q',
        color='short_desc:N'
        ).properties(
            title=f'Total Production in {state} by {commodity} Over Time',
            width=600,
            height=400
        )

        df_2018 = df[df['year'] == 2023]
        print(df_2018.groupby(['short_desc'])['total_prod'].sum().reset_index(name='Sum_Value'))



        # Display the chart
        chart.show()

                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  36960000.0


                                   short_desc  Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  8120000.0


                                   short_desc   Sum_Value
0  WHEAT, WINTER - PRODUCTION, MEASURED IN BU  32900000.0


In [34]:
def get_unique_options_for_param(api_key, state_code, param):
    # Define API endpoint and parameters
    base_url = "https://quickstats.nass.usda.gov/api/api_GET/"
    params = {
        'key': api_key,
        'source_desc': 'Survey',
        'sector_desc': 'CROPS',
        'state_alpha': state_code,
        'agg_level_desc': 'County',  # WHERE (level of aggregation)
        'format': 'json'             # Return the data in JSON format
    }

    # Send GET request
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()['data']
        df = pd.DataFrame(data)
        
        # Extract unique options for the specified parameter
        unique_values = df[param].unique() if param in df.columns else None
        return unique_values
    else:
        print(f"Error: {response.status_code}")
        return None

# Example usage:
unique_groups = get_unique_options_for_param(api_key, 'IL', 'group_desc')
print(unique_groups)

Error: 413
None


In [3]:
import pandas as pd
import requests
import json
import sqlite3
import altair as alt

def get_crop_data_by_state(api_key, state_code, decade, group, statistic_cat):
    # Define API endpoint and parameters
    base_url = "https://quickstats.nass.usda.gov/api/api_GET/"
    params = {
        'key': api_key,
        'source_desc': 'Survey',
        'sector_desc': 'CROPS',
        'group_desc': group,
        'statisticcat_desc': statistic_cat,  # WHAT (commodity)
        'year__LIKE': decade,                   # WHEN (year since 2010)
        'state_alpha': state_code,          # Pass the state code as an argument
        'agg_level_desc': 'County',         # WHERE (level of aggregation)
        'format': 'json'                    # Return the data in JSON format
    }

    # Send GET request
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()  # Parse the JSON data
        data2 = data['data']
        df = pd.DataFrame(data2)
        return df  # Return the data object
    else:
        print(f"Error: {response.status_code}")
        return None  # Return None if there was an error


Unnamed: 0,reference_period_desc,domain_desc,load_time,year,Value,short_desc,util_practice_desc,county_name,end_code,state_ansi,...,week_ending,watershed_desc,state_alpha,state_name,class_desc,begin_code,unit_desc,country_code,location_desc,group_desc
0,YEAR,TOTAL,2012-01-01 00:00:00.000,1969,170000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
1,YEAR,TOTAL,2012-01-01 00:00:00.000,1968,175000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
2,YEAR,TOTAL,2012-01-01 00:00:00.000,1967,274000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
3,YEAR,TOTAL,2012-01-01 00:00:00.000,1966,202000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
4,YEAR,TOTAL,2012-01-01 00:00:00.000,1965,365000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
5,YEAR,TOTAL,2012-01-01 00:00:00.000,1964,280000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
6,YEAR,TOTAL,2012-01-01 00:00:00.000,1963,483000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
7,YEAR,TOTAL,2012-01-01 00:00:00.000,1962,293000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
8,YEAR,TOTAL,2012-01-01 00:00:00.000,1961,488000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS
9,YEAR,TOTAL,2012-01-01 00:00:00.000,1960,473000,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",GRAIN,COLBERT,0,1,...,,,AL,ALABAMA,ALL CLASSES,0,BU,9000,"ALABAMA, NORTHERN VALLEY, COLBERT",FIELD CROPS


In [None]:
db_name = 'field_crops.db'

query = "Select state_alpha, year, sum(value) as total_prod, count(*) from field_crops where year = 2023 and commodity_desc = 'CORN' group by state_alpha"
#query = "Select sum(value) as total_prod, count(*) from field_crops where year = 2023 and commodity_desc = 'CORN'"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result




In [41]:
db_name = 'field_crops.db'

query = "Select distinct(util_practice_desc) from midwest_field_crops limit 10"
conn = sqlite3.connect(db_name) 
result = pd.read_sql(query, conn)
pd.set_option('display.max_columns', None)  # Display all rows
result

Unnamed: 0,util_practice_desc
0,GRAIN
1,ALL UTILIZATION PRACTICES
2,SILAGE


In [None]:
query = "Select commodity_desc, class_desc, count(*) from field_crops  group by commodity_desc, class_desc;"
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
result

In [None]:
query = "Select county_name, state_alpha, commodity_desc, year, value, class_desc from field_crops where commodity_desc = 'CORN' and year = 2023 and state_alpha = 'CA' "
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)
result

In [None]:
query = "Select * from field_crops where commodity_desc = 'CORN' and year = 2023 and state_alpha = 'CA' "
result = pd.read_sql(query, conn)
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)
result

In [None]:
result.dtypes

In [None]:
data2 = data['data']

test_df = pd.DataFrame(data2)
test_df.columns


pd.set_option('display.max_columns', None)
wheat_df = test_df[test_df['commodity_desc'] == 'WHEAT']
wheat_df['class_desc'].value_counts()




In [None]:
data2 = data['data']
test_df = pd.DataFrame(data2)
test_df


In [None]:
slim_test_df['CV'] = slim_test_df['CV (%)']
slim_test_df

In [None]:
test_df

['short_desc','unit_desc', 'commodity_desc', 'class_desc']

cols = ['year', 'state_alpha', 'county_name', 'class_desc', 'commodity_desc',
       'short_desc', 'CV (%)',
        'unit_desc',
       'Value',
        'sector_desc', 'util_practice_desc',
       'statisticcat_desc', 'region_desc', 'domaincat_desc', 'agg_level_desc',
       'prodn_practice_desc', 'domain_desc', 'group_desc']

slim_test_df = test_df[cols]
slim_test_df[slim_test_df['commodity_desc'] == "CORN"]
slim_test_df.groupby(['commodity_desc', 'class_desc']).size().reset_index(name='Counts')
slim_test_df.groupby(['commodity_desc', 'util_practice_desc']).size().reset_index(name='Counts')
