In [49]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from datetime import datetime

df = pd.read_csv('climate_action_data.csv')
df.head()

Unnamed: 0,Sensor_ID,Date,Soil_Moisture(%),Soil_pH,Temperature(C),Humidity(%),Crop_Type,Fertilizer_Recommended(kg/ha),Irrigation_Recommended(mm),Drone_Image_ID
0,SEN-1000,2025-01-01,36.22,6.98,21.3,50.4,Tomatoes,67.6,26.6,IMG-2000
1,SEN-1001,2025-01-02,76.55,5.03,23.5,34.3,Wheat,130.4,8.3,IMG-2001
2,SEN-1002,2025-01-03,61.24,6.52,21.9,54.6,Tomatoes,36.0,29.4,IMG-2002
3,SEN-1003,2025-01-04,51.91,6.62,24.0,48.7,Maize,85.8,19.6,IMG-2003
4,SEN-1004,2025-01-05,20.92,5.98,19.2,70.6,Wheat,75.6,29.9,IMG-2004


In [50]:
df.shape

(913, 10)

In [51]:
df.describe()

Unnamed: 0,Sensor_ID,Date,Soil_Moisture(%),Soil_pH,Temperature(C),Humidity(%),Crop_Type,Fertilizer_Recommended(kg/ha),Irrigation_Recommended(mm),Drone_Image_ID
count,913,909,913,913.0,913.0,913.0,909,913.0,913.0,913
unique,800,798,750,313.0,170.0,448.0,5,595.0,243.0,800
top,SEN-1430,2026-09-10,error,7.54,19.1,56.0,Wheat,66.9,8.0,IMG-2430
freq,2,2,5,11.0,13.0,8.0,203,8.0,11.0,2


In [52]:
df.dtypes

Sensor_ID                        object
Date                             object
Soil_Moisture(%)                 object
Soil_pH                          object
Temperature(C)                   object
Humidity(%)                      object
Crop_Type                        object
Fertilizer_Recommended(kg/ha)    object
Irrigation_Recommended(mm)       object
Drone_Image_ID                   object
dtype: object

In [53]:
df.isnull().sum()

Sensor_ID                        0
Date                             4
Soil_Moisture(%)                 0
Soil_pH                          0
Temperature(C)                   0
Humidity(%)                      0
Crop_Type                        4
Fertilizer_Recommended(kg/ha)    0
Irrigation_Recommended(mm)       0
Drone_Image_ID                   0
dtype: int64

In [54]:
numeric_cols = ['Soil_Moisture(%)','Soil_pH','Temperature(C)','Humidity(%)','Fertilizer_Recommended(kg/ha)','Irrigation_Recommended(mm)']
for col in numeric_cols:
    if col in df.columns:
        #change them to numeric
        df[col]= pd.to_numeric(df[col], errors='coerce')
        #Replace NaN with median
        df[col] = df[col].fillna(df[col].median)
        
    else:
        print(f'{col} is not part of the dataset')

print(f'Datatypes after handling numeric values')
df.dtypes

Datatypes after handling numeric values


Sensor_ID                        object
Date                             object
Soil_Moisture(%)                 object
Soil_pH                          object
Temperature(C)                   object
Humidity(%)                      object
Crop_Type                        object
Fertilizer_Recommended(kg/ha)    object
Irrigation_Recommended(mm)       object
Drone_Image_ID                   object
dtype: object

In [55]:
#Non numeric cols
non_numeric_cols = ['Sensor_ID','Crop_Type','Drone_Image_ID']
#Replace null values with the mode
for col in non_numeric_cols:
    if col in df.columns:
        df[col]=df[col].fillna(df[col].mode)
    else:
        print(f'{col} is not a column in the dataset')
print('Null values in the dataset after handling:')
df.isnull().sum()

Null values in the dataset after handling:


Sensor_ID                        0
Date                             4
Soil_Moisture(%)                 0
Soil_pH                          0
Temperature(C)                   0
Humidity(%)                      0
Crop_Type                        0
Fertilizer_Recommended(kg/ha)    0
Irrigation_Recommended(mm)       0
Drone_Image_ID                   0
dtype: int64

In [56]:
#Deal with dates
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df.dtypes

Sensor_ID                                object
Date                             datetime64[ns]
Soil_Moisture(%)                         object
Soil_pH                                  object
Temperature(C)                           object
Humidity(%)                              object
Crop_Type                                object
Fertilizer_Recommended(kg/ha)            object
Irrigation_Recommended(mm)               object
Drone_Image_ID                           object
dtype: object