In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Corrected File Path (if needed) and Load
try:
    df = pd.read_csv('crop_irrigation_synth_2years.csv')
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: The file 'crop_irrigation_synth_2years.csv' was not found. Please ensure it is in the same directory.")

# **CRITICAL: Add this line to check the number of rows**
print(f"Number of rows in the DataFrame: {len(df)}")
print(df.head())

Dataset loaded successfully!
Number of rows in the DataFrame: 473040
         Date   State   Crop Growth_Stage  Rainfall_mm  Temp_Max_C  \
0  2022-01-01  Punjab  Wheat   Vegetative          0.0        14.9   
1  2022-01-02  Punjab  Wheat   Vegetative          0.0        12.1   
2  2022-01-03  Punjab  Wheat    Flowering          0.0        13.7   
3  2022-01-04  Punjab  Wheat    Flowering          1.0        13.8   
4  2022-01-05  Punjab  Wheat    Flowering          0.0        12.5   

   Temp_Min_C  Humidity_Percent Soil_Type  Soil_Moisture_Percent  \
0         7.2              30.5     Silty                   32.3   
1         7.3              42.0     Silty                   37.4   
2         8.6              46.0     Silty                   41.2   
3         6.9              35.9     Silty                   44.1   
4         6.9              39.2     Silty                   40.0   

   Irrigation_Required_mm  
0                    4.31  
1                    3.97  
2                

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Separate features (X) and target (y)
X = df.drop(['Irrigation_Required_mm', 'Date'], axis=1)
y = df['Irrigation_Required_mm']

# Identify categorical and numerical columns
categorical_features = ['State', 'Crop', 'Growth_Stage', 'Soil_Type']
numerical_features = ['Rainfall_mm', 'Temp_Max_C', 'Temp_Min_C', 'Humidity_Percent', 'Soil_Moisture_Percent']

# Create a preprocessor with OneHotEncoder
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

# Apply the preprocessing
X_processed = preprocessor.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [19]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)

# Train the model
model.fit(X_train, y_train)

print("Random Forest model trained successfully!")

Random Forest model trained successfully!


In [21]:
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
import numpy as np

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae:.2f} mm")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} mm")
print(f"R-squared (R2) Score: {r2:.2f}")

Mean Absolute Error (MAE): 0.01 mm
Root Mean Squared Error (RMSE): 0.02 mm
R-squared (R2) Score: 1.00


In [24]:
import joblib
import os

# Define the directory where you want to save the files on the D: drive
save_path = 'D:/my_models/'

# Check if the directory exists, and if not, create it
if not os.path.exists(save_path):
    os.makedirs(save_path)
    print(f"Directory created at: {save_path}")

# Now, save the trained model and preprocessor to the specified path
joblib.dump(model, os.path.join(save_path, 'irrigation_model.joblib'))
joblib.dump(preprocessor, os.path.join(save_path, 'data_preprocessor.joblib'))

print(f"Models and preprocessor saved to: {save_path}")

Directory created at: D:/my_models/
Models and preprocessor saved to: D:/my_models/
