In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import pickle

# Load dataset
df = pd.read_csv('../data/Crop Recommendation using Soil Properties and Weather Prediction.csv')

# Select 7 features for website demo
features = [
    "T2M_MAX-W",        # temperature
    "QV2M-W",            # humidity
    "PRECTOTCORR-W",     # rainfall
    "Ph",                # soil pH
    "N",                 # nitrogen
    "P",                 # phosphorus
    "K"                  # potassium
]

X = df[features]
le = LabelEncoder()
y = le.fit_transform(df["label"])

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Train model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save model & scaler
with open("rf_model.pkl", "wb") as f:
    pickle.dump(rf_model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

