# API Call Function

In [None]:
# set your api key here
api_key = "" 

In [None]:
import pandas as pd
import requests
import json
import sqlite3
import altair as alt

def get_crop_data_by_state(api_key, state_code, decade, group, util, statistic_cat):
    # Define API endpoint and parameters
    base_url = "https://quickstats.nass.usda.gov/api/api_GET/"
    params = {
        'key': api_key,
        'source_desc': 'Survey',
        'group_desc': group,
        'statisticcat_desc': statistic_cat,  # WHAT (commodity)
        'prodn_practice_desc': util,
        'year__LIKE': decade,                   # WHEN (year since 2010)
        'state_alpha': state_code,          # Pass the state code as an argument
        'agg_level_desc': 'County',         # WHERE (level of aggregation)
        'format': 'json'                    # Return the data in JSON format
    }

    # Send GET request
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()  # Parse the JSON data
        data2 = data['data']
        df = pd.DataFrame(data2)
        return df  # Return the data object
    else:
        print(f"Error: {response.status_code}")
        return None  #

In [None]:
def save_field_crops_to_db(df, db_name='field_crops.db', table_name='field_crops'):
    """
    Saves the specified DataFrame to an SQLite database table and prints the number of new records added.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing the field crops data.
    db_name (str): The name of the SQLite database file (default is 'field_crops.db').
    """
    # Step 1: Connect to the SQLite database
    conn = sqlite3.connect(db_name)  # Creates an SQLite database file
    cursor = conn.cursor()

    # Filter the DataFrame by renaming the 'CV (%)' column to 'CV'
    df2 = df.rename(columns={'CV (%)': 'CV'})

    df2['Value'] = df2['Value'].str.replace(',', '')
    df2['Value'] = pd.to_numeric(df2['Value'], errors='coerce')

    df3 = specify_crops(df2)


    # Step 3: Create the 'field_crops' table if it doesn't already exist
    create_table_query = f'''
        CREATE TABLE IF NOT EXISTS {table_name} (
            reference_period_desc TEXT,
            domain_desc TEXT,
            load_time TEXT,
            year INTEGER,
            Value REAL,
            short_desc TEXT,
            util_practice_desc TEXT,
            county_name TEXT,
            end_code TEXT,
            state_ansi TEXT,
            country_name TEXT,
            CV REAL,
            county_ansi TEXT,
            domaincat_desc TEXT,
            congr_district_code TEXT,
            asd_code TEXT,
            freq_desc TEXT,
            asd_desc TEXT,
            agg_level_desc TEXT,
            commodity_desc TEXT,
            county_code TEXT,
            statisticcat_desc TEXT,
            zip_5 TEXT,
            source_desc TEXT,
            state_fips_code TEXT,
            region_desc TEXT,
            watershed_code TEXT,
            prodn_practice_desc TEXT,
            sector_desc TEXT,
            week_ending TEXT,
            watershed_desc TEXT,
            state_alpha TEXT,
            state_name TEXT,
            class_desc TEXT,
            begin_code TEXT,
            unit_desc TEXT,
            country_code TEXT,
            location_desc TEXT,
            group_desc TEXT
        )
        '''
    cursor.execute(create_table_query)

    # Step 4: Count records before insertion
    cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    initial_count = cursor.fetchone()[0]

    # Insert the DataFrame into the 'field_crops' table
    df3.to_sql(table_name, conn, if_exists='append', index=False)

    # Step 5: Count records after insertion
    cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    new_count = cursor.fetchone()[0]

    # Calculate the number of new records added
    num_new_records = new_count - initial_count

    # Print the result
    print(f"{num_new_records} new records added to the '{table_name}' table.")

    # Commit the changes and close the connection
    conn.commit()
    conn.close()


def specify_crops(df, crop_list = ['CORN', 'SOYBEANS', 'WHEAT']):
    df_clean = df[df['commodity_desc'].isin(crop_list)]
    return df_clean

def save_cleaned_field_crops_to_db(original_table,
                                   cleaned_table, 
                                   db_name='field_crops.db',
                                   exclude_short_desc=None):
    """
    Reads the original table from the SQLite database, cleans it by excluding records with certain short_desc values, 
    and saves the cleaned data to a new table.

    Parameters:
    db_name (str): The name of the SQLite database file (default is 'field_crops.db').
    original_table (str): The name of the original table to read data from (default is 'field_crops').
    cleaned_table (str): The name of the new table to save the cleaned data (default is 'field_crops_cleaned').
    exclude_short_desc (list or set): A list or set of short_desc values to exclude (default is None).
    """
   
    conn = sqlite3.connect(db_name)
    query = f"SELECT * FROM {original_table}"
    df = pd.read_sql_query(query, conn)

    # Step 5: Apply the filtering logic to exclude specific short_desc values (if provided)
    if exclude_short_desc:
        df = df[~df['short_desc'].isin(exclude_short_desc)]

    # Step 6: Create the new table with the same schema as the original table
    create_table_query = f'''
        CREATE TABLE IF NOT EXISTS {cleaned_table} (
            reference_period_desc TEXT,
            domain_desc TEXT,
            load_time TEXT,
            year INTEGER,
            Value REAL,
            short_desc TEXT,
            util_practice_desc TEXT,
            county_name TEXT,
            end_code TEXT,
            state_ansi TEXT,
            country_name TEXT,
            CV REAL,
            county_ansi TEXT,
            domaincat_desc TEXT,
            congr_district_code TEXT,
            asd_code TEXT,
            freq_desc TEXT,
            asd_desc TEXT,
            agg_level_desc TEXT,
            commodity_desc TEXT,
            county_code TEXT,
            statisticcat_desc TEXT,
            zip_5 TEXT,
            source_desc TEXT,
            state_fips_code TEXT,
            region_desc TEXT,
            watershed_code TEXT,
            prodn_practice_desc TEXT,
            sector_desc TEXT,
            week_ending TEXT,
            watershed_desc TEXT,
            state_alpha TEXT,
            state_name TEXT,
            class_desc TEXT,
            begin_code TEXT,
            unit_desc TEXT,
            country_code TEXT,
            location_desc TEXT,
            group_desc TEXT
        )
    '''
    conn.execute(create_table_query)

    # Step 7: Insert the cleaned data into the new table
    df.to_sql(cleaned_table, conn, if_exists='replace', index=False)

    # Step 8: Commit and close the connection
    conn.commit()
    conn.close()
    print(f"Cleaned data saved to {db_name} in new table {cleaned_table}.")


In [8]:
def gather_all_states(api_key, state_list, decade_list, group, statistic_cat, util, table_name):
    for state in state_list:
        print(f"pulling data for {state}")
        for decade in decade_list:
            print(f"pulling for decade {decade}")
            state_df = get_crop_data_by_state(api_key, state, decade, group, util, statistic_cat)
            if state_df is None:
                print(f"no records for {state} in {decade}")
                pass
            else:
                save_field_crops_to_db(state_df, table_name=table_name)

## midwest key crops

In [None]:


group = "Field Crops"
static_of_interest = "Production"
midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
util  = 'ALL PRODUCTION PRACTICES'

decade_list = ['196%', '197%','198%','199%','200%','201%', '202%']
gather_all_states(api_key, midwestern_states, decade_list, group, static_of_interest, util, table_name='midwest_key_field_crops')

save_cleaned_field_crops_to_db(original_table='midwest_key_field_crops', 
                               cleaned_table='midwest_key_field_crops_cleaned',
                               exclude_short_desc=['WHEAT - PRODUCTION, MEASURED IN BU'])

### Midwest Area Planted for Key Crops

In [None]:

group = "Field Crops"
static_of_interest = "Area Planted"
midwestern_states = ['IL', 'IN', 'IA', 'KS', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI']
util  = 'ALL PRODUCTION PRACTICES'

decade_list = ['196%', '197%','198%','199%','200%','201%', '202%']
gather_all_states(api_key, midwestern_states, decade_list, group, static_of_interest, util, table_name='midwest_area_planted')

save_cleaned_field_crops_to_db(original_table='midwest_area_planted', 
                               cleaned_table='midwest_area_planted_cleaned',
                               exclude_short_desc=['WHEAT - ACRES PLANTED'])