In [1]:
pip install pandas numpy scikit-learn joblib


Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import pandas as pd
import logging
import os
import traceback
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# Setup logging
log_folder = "notebooks"
os.makedirs(log_folder, exist_ok=True)
log_file = os.path.join(log_folder, "training.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def log_exception():
    exc_type, exc_value, exc_traceback = traceback.format_exc().splitlines()[-3:]
    logging.error(f"Exception: {exc_value}, Location: {exc_traceback}")

try:
    # Load dataset
    dataset_path = "irrigation_data.csv"
    dataset = pd.read_csv(dataset_path)
    logging.info("Dataset loaded successfully.")

    # Define feature columns
    numerical_features = ["Rainfall (mm)", "Soil Moisture (%)", "Temperature ", "Evaporation Rate (mm/day)"]
    categorical_features = ["Crop Type", "Water Availability", "Soil Type"]

    # Step 1: Apply One-Hot Encoding to Categorical Features
    encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
    X_encoded = encoder.fit_transform(dataset[categorical_features])
    X_encoded_df = pd.DataFrame(X_encoded, columns=encoder.get_feature_names_out(categorical_features))
    logging.info("One-hot encoding applied.")

    # Step 2: Label Encode Target Variable
    target_encoder = LabelEncoder()
    dataset["Irrigation Type"] = target_encoder.fit_transform(dataset["Irrigation Type"])
    y = dataset["Irrigation Type"]
    logging.info("Target variable encoded.")

    # Combine Encoded Features with Numerical Data
    X = pd.concat([X_encoded_df, dataset[numerical_features]], axis=1)

    # Step 3: Split Dataset into Train & Test Sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0, stratify=y)
    logging.info("Dataset split into training and testing sets.")

    # Step 4: Normalize Only Numerical Columns
    scaler = MinMaxScaler()
    X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
    X_test[numerical_features] = scaler.transform(X_test[numerical_features])
    logging.info("Feature scaling applied.")

    # XGBoost Model
    print("\n----- XGBoost Training -----\n")
    logging.info("Starting XGBoost training.")
    
    xgb = XGBClassifier(eval_metric='mlogloss', random_state=0)
    xgb.fit(X_train, y_train)
    y_pred_xgb = xgb.predict(X_test)
    
    accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
    logging.info(f"XGBoost Accuracy: {accuracy_xgb:.2f}")
    print(f"XGBoost Accuracy: {accuracy_xgb:.2f}")

    # Display Confusion Matrix for XGBoost
    print("XGBoost Confusion Matrix:\n", confusion_matrix(y_test, y_pred_xgb))
    logging.info("XGBoost confusion matrix computed.")

except Exception as e:
    log_exception()
    print(f"An error occurred: {e}")



----- XGBoost Training -----

XGBoost Accuracy: 0.64
XGBoost Confusion Matrix:
 [[26  0  0  1  2  2  1  0]
 [ 2  3  1  1  2  1  1  1]
 [ 2  1 26  0  0  3  0  2]
 [ 2  1  2 15  0  0  1  1]
 [ 2  2  0  0  2  0  1  4]
 [ 5  3  1  1  2 20  0  1]
 [ 1  3  1  2  2  1  0  1]
 [ 1  0  2  1  2  2  1 36]]


In [7]:
import pickle

models_folder = r"C:\Users\VINIL\Desktop\irrigation_type\flask_app\models"
os.makedirs(models_folder, exist_ok=True)

# Define file paths
model_path = os.path.join(models_folder, "xgboost_irrigation_model.pkl")
encoder_path = os.path.join(models_folder, "onehot_encoder.pkl")
scaler_path = os.path.join(models_folder, "minmax_scaler.pkl")
label_encoder_path = os.path.join(models_folder, "label_encoder.pkl")

# Save the trained XGBoost model
with open(model_path, "wb") as model_file:
    pickle.dump(xgb, model_file)

# Save the OneHotEncoder
with open(encoder_path, "wb") as encoder_file:
    pickle.dump(encoder, encoder_file)

# Save the MinMaxScaler
with open(scaler_path, "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

# Save the LabelEncoder
with open(label_encoder_path, "wb") as label_encoder_file:
    pickle.dump(target_encoder, label_encoder_file)

logging.info("XGBoost model and preprocessing objects saved successfully in the models folder.")
print("Model and encoders saved successfully in:", models_folder)


Model and encoders saved successfully in: C:\Users\VINIL\Desktop\irrigation_type\flask_app\models
