In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv("dataset/irrigation.csv")

# Clean column names by stripping whitespace and renaming
df.columns = df.columns.str.strip()
column_mapping = {
    'District': 'District',
    'Crop': 'Crop_Type',
    'Soil_Type': 'Soil_Type',
    'Day_After_Sowing': 'Day_After_Sowing',
    'Temperature_C': 'Temperature(°C)',
    'Rainfall_mm': 'Rainfall(mm)',
    'Water_Requirement_mm_per_day': 'Water_Requirement(mm/day)',
    'Area_in_Acre': 'Area_in_Acre',
    'Water_Requirement_Liters_per_day': 'Water_Requirement(L/day)'
}
df = df.rename(columns=column_mapping)

print("DataFrame columns after cleaning:", df.columns.tolist())
df.head()

DataFrame columns after cleaning: ['District', 'Crop_Type', 'Soil_Type', 'Day_After_Sowing', 'Temperature(°C)', 'Rainfall(mm)', 'Water_Requirement(mm/day)', 'Area_in_Acre', 'Water_Requirement(L/day)']


Unnamed: 0,District,Crop_Type,Soil_Type,Day_After_Sowing,Temperature(°C),Rainfall(mm),Water_Requirement(mm/day),Area_in_Acre,Water_Requirement(L/day)
0,Hazaribagh,Barley,Red Soil,25,31.8,5.0,5.39,2.5,54531.4
1,Saraikela-Kharsawan,Urad,Red Soil,116,29.7,2.1,8.02,2.0,64911.6
2,Pakur,Jackfruit,Gravelly Loam,122,28.4,13.9,10.29,1.0,41642.2
3,Koderma,Ragi,Alluvial Soil,42,25.1,11.3,4.44,0.5,8984.0
4,Deoghar,Millet,Red Soil,2,33.2,6.9,5.1,3.0,61917.0


In [3]:
# Encode district, soil type, crop type
le_district = LabelEncoder()
df['District_enc'] = le_district.fit_transform(df['District'])

le_soil = LabelEncoder()
df['Soil_Type_enc'] = le_soil.fit_transform(df['Soil_Type'])

le_crop = LabelEncoder()
df['Crop_Type_enc'] = le_crop.fit_transform(df['Crop_Type'])

# Features and target
X = df[['District_enc', 'Soil_Type_enc', 'Crop_Type_enc',
        'Day_After_Sowing', 'Temperature(°C)',
        'Rainfall(mm)', 'Area_in_Acre']].values  # Removed Humidity(%) as it's not in the dataset
y = df['Water_Requirement(L/day)'].values.reshape(-1,1)

# Scale features and target
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)

In [4]:
class IrrigationDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
train_dataset = IrrigationDataset(X_train, y_train)
test_dataset = IrrigationDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)


In [5]:
class IrrigationModel(nn.Module):
    def __init__(self, input_dim):
        super(IrrigationModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, x):
        return self.model(x)

input_dim = X_train.shape[1]
model = IrrigationModel(input_dim)


In [6]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    for features, target in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/50] Loss: 0.1082
Epoch [2/50] Loss: 0.0743
Epoch [2/50] Loss: 0.0743
Epoch [3/50] Loss: 0.0709
Epoch [3/50] Loss: 0.0709
Epoch [4/50] Loss: 0.0685
Epoch [4/50] Loss: 0.0685
Epoch [5/50] Loss: 0.0667
Epoch [5/50] Loss: 0.0667
Epoch [6/50] Loss: 0.0599
Epoch [6/50] Loss: 0.0599
Epoch [7/50] Loss: 0.0564
Epoch [7/50] Loss: 0.0564
Epoch [8/50] Loss: 0.0543
Epoch [8/50] Loss: 0.0543
Epoch [9/50] Loss: 0.0533
Epoch [9/50] Loss: 0.0533
Epoch [10/50] Loss: 0.0513
Epoch [10/50] Loss: 0.0513
Epoch [11/50] Loss: 0.0480
Epoch [11/50] Loss: 0.0480
Epoch [12/50] Loss: 0.0447
Epoch [12/50] Loss: 0.0447
Epoch [13/50] Loss: 0.0418
Epoch [13/50] Loss: 0.0418
Epoch [14/50] Loss: 0.0382
Epoch [14/50] Loss: 0.0382
Epoch [15/50] Loss: 0.0344
Epoch [15/50] Loss: 0.0344
Epoch [16/50] Loss: 0.0296
Epoch [16/50] Loss: 0.0296
Epoch [17/50] Loss: 0.0248
Epoch [17/50] Loss: 0.0248
Epoch [18/50] Loss: 0.0218
Epoch [18/50] Loss: 0.0218
Epoch [19/50] Loss: 0.0196
Epoch [19/50] Loss: 0.0196
Epoch [20/50] Loss

In [7]:
model.eval()
with torch.no_grad():
    y_preds = []
    y_true = []
    for features, target in test_loader:
        outputs = model(features)
        y_preds.append(outputs)
        y_true.append(target)
    
    y_preds = torch.cat(y_preds).numpy()
    y_true = torch.cat(y_true).numpy()

    # Inverse scale
    y_preds_inv = scaler_y.inverse_transform(y_preds)
    y_true_inv = scaler_y.inverse_transform(y_true)

    from sklearn.metrics import mean_absolute_error, r2_score
    mae = mean_absolute_error(y_true_inv, y_preds_inv)
    r2 = r2_score(y_true_inv, y_preds_inv)
    print(f"MAE: {mae:.2f}, R2 Score: {r2:.2f}")


MAE: 1774.42, R2 Score: 1.00


In [8]:
torch.save(model.state_dict(), "irrigation_model.pt")
import joblib
joblib.dump(scaler_X, "scaler_X.pkl")
joblib.dump(scaler_y, "scaler_y.pkl")

# Save LabelEncoders
joblib.dump(le_district, "le_district.pkl")
joblib.dump(le_soil, "le_soil.pkl")
joblib.dump(le_crop, "le_crop.pkl")

print("✅ Model, scalers, and encoders saved!")


✅ Model, scalers, and encoders saved!
