<a href="https://colab.research.google.com/github/Vinikiran006/Week2/blob/main/lighGBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# -----------------------------
# Load Data
# -----------------------------
df = pd.read_csv("data_core_updated.csv")

# -----------------------------
# Feature Engineering
# -----------------------------
df["NPK_total"] = df["Nitrogen"] + df["Phosphorus"] + df["Potassium"]
df["NPK_ratio_N"] = df["Nitrogen"] / (df["Phosphorus"] + df["Potassium"] + 1)
df["NPK_ratio_P"] = df["Phosphorus"] / (df["Nitrogen"] + df["Potassium"] + 1)
df["NPK_ratio_K"] = df["Potassium"] / (df["Nitrogen"] + df["Phosphorus"] + 1)

# -----------------------------
# Separate Features & Target
# -----------------------------
X = df.drop("Fertilizer", axis=1)
y = df["Fertilizer"]

# Identify columns
num_features = ["Temperature", "Humidity", "Soil Moisture",
                "Nitrogen", "Phosphorus", "Potassium",
                "NPK_total", "NPK_ratio_N", "NPK_ratio_P", "NPK_ratio_K"]
cat_features = ["Soil Type", "Crop Type"]

# -----------------------------
# Preprocessing
# -----------------------------
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
])

# -----------------------------
# Build Pipeline with LightGBM
# -----------------------------
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", LGBMClassifier(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=-1,
        class_weight="balanced",
        random_state=42
    ))
])

# -----------------------------
# Train-Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# -----------------------------
# Train
# -----------------------------
pipeline.fit(X_train, y_train)

# -----------------------------
# Predictions
# -----------------------------
y_pred = pipeline.predict(X_test)

# -----------------------------
# Evaluation
# -----------------------------
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=3))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m




Test Accuracy: 0.8505555555555555

Confusion Matrix:
 [[ 84   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   7   5   0   0   6]
 [  0  14   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   1   0   0   0   0]
 [  0   0  11   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   1   0   0   0]
 [  0   0   0  10   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   2   0   0   0]
 [  0   0   0   0  35   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   2   2   0   0   3]
 [  0   0   0   0   0  23   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   5   0   0   2]
 [  0   0   0   0   1   0  11   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0  14   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   2   0]
 [  0   0   0   0   0   0   0   0  30   0   0   0   0   0   0   0   0   0
    0   0   2   7   0   1   2]
 

In [None]:
import sklearn
print(sklearn.__version__)  # likely 1.6.1


1.6.1


In [None]:
import joblib

# Save the trained pipeline
joblib.dump(pipeline, 'fertilizer_model.pkl')

print("Model saved successfully as fertilizer_model.pkl")

Model saved successfully as fertilizer_model.pkl


In [None]:

model = joblib.load("fertilizer_model.pkl")

# Number of input features
print(model.n_features_in_)

12


In [None]:
X_train.shape  # X_train was your feature matrix during training


(7200, 12)

In [None]:
print("Feature names used to train the model:", model.feature_names_in_)

Feature names used to train the model: ['Temperature' 'Humidity' 'Soil Moisture' 'Soil Type' 'Crop Type'
 'Nitrogen' 'Potassium' 'Phosphorus' 'NPK_total' 'NPK_ratio_N'
 'NPK_ratio_P' 'NPK_ratio_K']
