In [None]:
import pandas as pd
import numpy as ny
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

df = pd.read_csv('climate_action_data.csv')
df.info()

In [None]:
print(df.shape)
print("\nColumn Names: ", df.columns.tolist())

In [None]:
df.describe

In [None]:
df.head(112)

In [None]:
df.isnull().sum()

In [None]:
print(df.dtypes)

In [None]:
print(df[df['Date'].isnull()])


In [None]:
#Data Cleaning
df['Date'] = pd.to_datetime(df['Date'], errors = 'coerce')
df['Date'] = df['Date'].fillna(pd.to_datetime(df['Date'].dropna().median()))

df['Crop_Type'] = df['Crop_Type'].fillna(df['Crop_Type'].mode()[0])
        
numerical_columns = ['Soil_Moisture(%)','Soil_pH','Temperature(C)','Humidity(%)','Fertilizer_Recommended(kg/ha)','Irrigation_Recommended(mm)']
for column in numerical_columns:
    if column in df.columns:
        df[column] = pd.to_numeric(df[column], errors = 'coerce')
        df[column] = df[column].fillna(df[column].median())
        
df.drop_duplicates(inplace = True)

print("\n", df.duplicated())
print("\n Shape of CSV File: ", df.shape)
print("\n Data Types: \n", df.dtypes)
        
df.isnull().sum()

In [34]:
df.to_csv('cleaned_precision_agriculture_data.csv')

In [None]:
#Data Analysis
numeric = df.select_dtypes(include='number')
corr = numeric.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt = '.2f')
plt.title('Correlation Heatmap of Numeric Columns')
plt.show()

In [None]:
plt.figure(figsize=(15,10))

plt.subplot(1,2,1)
sns.histplot(df['Soil_Moisture(%)'], bins=30, kde=True)
plt.title('Soil Moisture Distribution')

plt.subplot(1,2,2)
sns.histplot(df['Soil_pH'], bins=30, kde=True)
plt.title('Soil PH Distribution')


In [None]:
plt.figure(figsize=(15,10))

plt.subplot(1,2,1)
sns.histplot(df['Temperature(C)'], bins=30, kde=True)
plt.title('Temperature Distribution')

plt.subplot(1,2,2)
sns.histplot(df['Humidity(%)'], bins=30, kde=True)
plt.title('Humidity Distribution')

In [None]:
plt.figure(figsize=(15,10))

plt.subplot(1,2,1)
sns.histplot(df['Fertilizer_Recommended(kg/ha)'], bins=30, kde=True)
plt.title('Fertilizer Recommended Distribution')

plt.subplot(1,2,2)
sns.histplot(df['Irrigation_Recommended(mm)'], bins=30, kde=True)
plt.title('Irrigation Recommended Distribution')

In [None]:
#Crop with highest average soil moisture
average_moisture = df.groupby('Crop_Type')['Soil_Moisture(%)'].mean()
highest_crop = average_moisture.idxmax()
highest_value = average_moisture.max()

print(f"Crop with highest average soil moisture: {highest_crop} {highest_value:.2f}")

In [None]:
crops_above_30 = df[df['Temperature(C)'] > 30]
average_irrigation = crops_above_30.groupby('Crop_Type')['Irrigation_Recommended(mm)'].mean()

print("Irrigation Adjustments: ")
print("\n", average_irrigation)

-The variable that most influences fertilizer recommendations is Soil PH since according to the Correlation HeatMap it has the weakest positive Correlation of +0.08 and the Other variables have near to zero correlation suggesting almost no linear relationship with fertilizer recommendations.

-The crop type with the highest average soil moisture is Wheat

Irrigation Adjustments: 

Beans       17.857692
Lettuce     16.297297
Maize       16.625455
Tomatoes    18.919565
Wheat       18.645946