In [6]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

# 1. Load the dataset
df = pd.read_csv('Fertilizer Prediction.csv')

# 2. Clean column names (strip spaces and make lowercase with underscores)
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# 3. Rename common typos (like "temparature")
df.rename(columns={
    'temparature': 'temperature',
    'phosphorous': 'phosphorus'  # optional normalization
}, inplace=True)

print("✅ Cleaned Columns:", df.columns.tolist())

# 4. Initialize label encoders
soil_encoder = LabelEncoder()
crop_encoder = LabelEncoder()
fertilizer_encoder = LabelEncoder()

# 5. Encode categorical columns
df['soil_type'] = soil_encoder.fit_transform(df['soil_type'])
df['crop_type'] = crop_encoder.fit_transform(df['crop_type'])
df['fertilizer_name'] = fertilizer_encoder.fit_transform(df['fertilizer_name'])

# 6. Define features and label
X = df[['temperature', 'humidity', 'moisture', 'soil_type', 'crop_type', 'nitrogen', 'phosphorus', 'potassium']]
y = df['fertilizer_name']

# 7. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 8. Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# 9. Save model and encoders
joblib.dump(model, 'fertilizer_prediction_model.pkl')
joblib.dump(soil_encoder, 'soil_encoder.pkl')
joblib.dump(crop_encoder, 'crop_encoder.pkl')
joblib.dump(fertilizer_encoder, 'fertilizer_encoder.pkl')

print("✅ Fertilizer model and encoders saved successfully!")


✅ Cleaned Columns: ['temperature', 'humidity', 'moisture', 'soil_type', 'crop_type', 'nitrogen', 'potassium', 'phosphorus', 'fertilizer_name']
✅ Fertilizer model and encoders saved successfully!
