In [None]:
import pandas as pd

# Loading the dataset into a dataframe
data_frame = pd.read_csv('climate_action_data.csv')
display(data_frame.head())

In [None]:
# Checking for empty cells
data_frame.isnull().sum()

#Print data frame information
print('\nData frame information before cleaning')
data_frame.info()


In [None]:
import pandas as pd
import numpy as np

#Loading the dataset into a dataframe
data_frame = pd.read_csv("climate_action_data.csv")

# Rename columns for easier use
data_frame.rename(columns={
    'Soil_Moisture(%)': 'Soil_Moisture_Percent',
    'Soil_pH': 'Soil_pH', # Already fine, but listed for clarity
    'Temperature(C)': 'Temperature_Celsius',
    'Humidity(%)': 'Humidity_Percent',
    'Fertilizer_Recommended(kg/ha)': 'Fertilizer_Recommended_kg_per_ha',
    'Irrigation_Recommended(mm)': 'Irrigation_Recommendation_mm'
}, inplace=True)

#Converting Date column to datetime objects, errors='coerce' turns unparseable dates into NaT
data_frame['Date'] = pd.to_datetime(data_frame['Date'], errors='coerce')

#These columns are supposed to be numeric but they are non
cols_to_numeric = ['Soil_Moisture_Percent', 'Soil_pH','Temperature_Celsius',
                  'Humidity_Percent', 'Fertilizer_Recommended_kg_per_ha', 'Irrigation_Recommendation_mm']

#Converting these columns to numeric
for col in cols_to_numeric:
    data_frame[col] = pd.to_numeric(data_frame[col], errors='coerce')

#Handling missing values
#Droping rows where Date is not NaT
data_frame.dropna(subset=['Date'], inplace=True)

#Drop rows where any of the numeric columns have NaN values after conversion
data_frame.dropna(subset=cols_to_numeric, inplace=True)

#Drop rows where crop_type is missing
data_frame.dropna(subset=['Crop_Type'], inplace=True)

#Data frame info after cleaning
print('\nData frame information after cleaning')
data_frame.info()

#Descriptive statistics after cleaning
print('\nDataframe Descriptive statistics after cleaning')
print(data_frame.describe())

print("\nValue counts for Crop_Type after cleaning:")
print(data_frame['Crop_Type'].value_counts())

#Saving the cleaned data to a new csv file
data_frame.to_csv("cleaned_climate_action_data.csv", index=False)
print("\nCleaned data saved to 'cleaned_climate_action_data.csv'")