### Importing Libraries

In [14]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

In [15]:
df = pd.read_csv("data.csv")

print(df.shape)
df.head()

(10000, 6)


Unnamed: 0,moisture,temp,humidity,rainfall,time_of_day,pump
0,29.0,29.3,58.5,1,22,0
1,23.9,30.8,57.8,1,22,0
2,30.2,29.6,45.6,0,1,0
3,37.2,32.4,60.8,0,19,0
4,23.1,36.8,40.1,0,14,0


In [16]:
np.random.seed(42)

# base logical rule
base_pump = ((df["moisture"] < 22) & (df["rainfall"] == 0)).astype(int)

# add real-world uncertainty (10% noise)
noise = np.random.rand(len(df)) < 0.10

# flip labels where noise occurs
df["pump"] = np.where(noise, 1 - base_pump, base_pump)

print(df["pump"].value_counts())

pump
0    6912
1    3088
Name: count, dtype: int64


In [17]:
X = df.drop(["pump", "rainfall"], axis=1)
y = df["pump"]

print(X.columns)

Index(['moisture', 'temp', 'humidity', 'time_of_day'], dtype='object')


In [18]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

print(X_train.shape, X_test.shape)

(7500, 4) (2500, 4)


In [19]:
model = DecisionTreeClassifier(
    max_depth=5,
    min_samples_leaf=20,
    random_state=42
)

model.fit(X_train, y_train)
print("Model trained ✅")

Model trained ✅


In [20]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8236

Confusion Matrix:
 [[1468  260]
 [ 181  591]]

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.85      0.87      1728
           1       0.69      0.77      0.73       772

    accuracy                           0.82      2500
   macro avg       0.79      0.81      0.80      2500
weighted avg       0.83      0.82      0.83      2500



In [21]:
scores = cross_val_score(model, X, y, cv=5)

print("CV scores:", scores)
print("Mean CV accuracy:", scores.mean())

CV scores: [0.8245 0.816  0.829  0.82   0.822 ]
Mean CV accuracy: 0.8222999999999999


In [22]:
importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": model.feature_importances_
}).sort_values(by="Importance", ascending=False)

importance

Unnamed: 0,Feature,Importance
0,moisture,0.972064
2,humidity,0.018238
1,temp,0.008146
3,time_of_day,0.001552


In [23]:
joblib.dump(model, "smartirrigation.pkl")
print("Model saved as smartirrigation.pkl ✅")

Model saved as smartirrigation.pkl ✅
