In [1]:
import pandas as pd
import os

In [None]:
def read_crop_data(csv_paths, crop):
    dfs = []
    for index, path in enumerate(csv_paths):
        df = pd.read_csv(path)
        # Select columns
        df = df[["NAME", "DATE", "PRCP", "TAVG", "TMAX", "TMIN"]]


        # Extract State Abbreviation
        df["STATE"] = df["NAME"].str.extract(r",\s*([A-Z]{2})\s")[0]
        df = df.drop(columns=["NAME"])

        # Extract Year and Month
        df["YEAR"] = df["DATE"].str.split("-").str[0]
        df["MONTH"] = df["DATE"].str.split("-").str[1]
        df = df.drop(columns={"DATE"})

        # Map of numerical month value to month
        month_mapping = {
            "01": "January",
            "02": "February",
            "03": "March",
            "04": "April",
            "05": "May",
            "06": "June",
            "07": "July",
            "08": "August",
            "09": "September",
            "10": "October",
            "11": "November",
            "12": "December",
        }

        # Replace numerical month value with month
        df["MONTH"] = df["MONTH"].replace(month_mapping)

        # Create column for corresponding crop type
        df["CROP"] = crop

        #TESTING FOR NULL VALUES
        # Get a list of unique states
        unique_states = df['STATE'].unique()

        # Loop through each state and check for null values
        for index_1, state in enumerate(unique_states):
            # Filter the DataFrame for the current state
            state_df = df[df['STATE'] == state]
            print("------------------------------------------------------------------")
            print(f"Checking null values for {crop} data in {state}")

            # Check columns with null values
            null_columns = state_df.columns[state_df.isnull().any()]

            print("Columns containing null values for this state:")
            print(null_columns)

            # Get counts of null values per column
            null_counts = state_df.isnull().sum()
            null_columns_with_counts = null_counts[null_counts > 0]

            print("Columns with null values and their counts:")
            print(null_columns_with_counts)

            if index_1 == len(unique_states) - 1:
                if index != len(csv_paths) - 1:
                    print("------------------------------------------------------------------")
                    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
                    print("LOADING NEXT .CSV FILE")
                    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
                else:
                    print("------------------------------------------------------------------")
        #TESTING FOR NULL VALUES

        dfs.append(df)

    #Combining dataframes of the same crop
    df_combined = pd.concat(dfs, ignore_index = True)

    return df_combined


In [None]:
# Relative paths for .csv data for each crop
corn_paths = ["noaa_csv_files/Corn_1.csv", "noaa_csv_files/Corn_2.csv"]
soybean_paths = ["noaa_csv_files/Soybean_1.csv", "noaa_csv_files/Soybean_2.csv"]
barley_paths = ["noaa_csv_files/Barley_1.csv", "noaa_csv_files/Barley_2.csv", "noaa_csv_files/Barley_3.csv"]
oats_paths = ["noaa_csv_files/Oats_1.csv", "noaa_csv_files/Oats_2.csv"]

# Crop types
crops = ["corn", "soybean", "barley", "oats"]

#List of .csv paths
paths = [corn_paths, soybean_paths, barley_paths, oats_paths]

#Use for loop and read_crop_data() fo create crop dataframes
crop_data = {}
for path, crop in zip(paths, crops):
    crop_data[crop] = read_crop_data(path, crop)

# for crop in crops:
#     print(crop_data[crop].head())

# for crop in crops:
#     print(crop_data[crop][crop_data[crop].isnull().any(axis=1)])

# for crop in crops:
#     crop_data[crop].to_csv(os.path.join("cleaned_csvs", crop + ".csv"))

------------------------------------------------------------------
Checking null values for corn data in IA
Columns containing null values for this state:
Index(['PRCP', 'TAVG', 'TMAX', 'TMIN'], dtype='object')
Columns with null values and their counts:
PRCP     12
TAVG    427
TMAX    426
TMIN    426
dtype: int64
------------------------------------------------------------------
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
LOADING NEXT .CSV FILE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
------------------------------------------------------------------
Checking null values for corn data in NE
Columns containing null values for this state:
Index(['PRCP', 'TAVG', 'TMAX', 'TMIN'], dtype='object')
Columns with null values and their counts:
PRCP       9
TAVG    1514
TMAX    1511
TMIN    1514
dtype: int64
------------------------------------------------------------------
Checking null values for corn data in MN
Columns containing null values f

In [14]:
soybean_df = crop_data["soybean"]
growing_months = ['June', 'July', 'August', 'September']

soybean_df = soybean_df[(soybean_df['STATE']=='IA')]
soybean_df = soybean_df[soybean_df['MONTH'].isin(growing_months)]
grouped_df = soybean_df.groupby('YEAR').agg({'PRCP': 'mean', 'TMIN': 'mean', 'TMAX': 'mean', 'TAVG': 'mean'}).reset_index()

grouped_df


Unnamed: 0,YEAR,PRCP,TMIN,TMAX,TAVG
0,2013,2.628333,59.175,81.05,70.1
1,2014,5.307333,57.6,78.25,67.925
2,2015,5.379167,58.925,80.575,69.75
3,2016,3.317857,59.0,82.4,70.7
4,2017,3.068333,57.15,82.025,69.575
5,2018,5.53375,59.775,81.275,70.575
6,2019,3.856429,58.95,81.125,70.05
7,2020,1.89,59.325,83.85,71.6
8,2021,2.830769,59.375,84.775,72.1
9,2022,1.98,58.525,84.85,71.7
