# Crop Recommendation System - Model Building
## Step-by-step ML pipeline

### Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

### Step 2: Load Dataset

In [None]:
df = pd.read_csv("../data/crop_recommendation.csv")
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['label'].value_counts()

### Step 3: Exploratory Data Analysis (EDA)

In [None]:
# Check missing values
df.isnull().sum()

In [None]:
# Feature distributions
df.hist(figsize=(12, 8))
plt.tight_layout()
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

### Step 4: Split Features & Target

In [None]:
X = df.drop('label', axis=1)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")

### Step 5: Train Random Forest Model

In [None]:
rf_model = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

rf_model.fit(X_train, y_train)
print("Model training complete!")

### Step 6: Evaluate the Model

In [None]:
y_pred = rf_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Confusion matrix
plt.figure(figsize=(12, 8))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=False, cmap='Blues')
plt.title("Confusion Matrix")
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

### Step 7: Test with Sample Input

In [None]:
sample = pd.DataFrame([{
    'N': 90,
    'P': 42,
    'K': 43,
    'temperature': 20.5,
    'humidity': 80,
    'ph': 6.5,
    'rainfall': 200
}])

prediction = rf_model.predict(sample)
print("Recommended Crop:", prediction[0])

### Step 8: Save the Trained Model

In [None]:
joblib.dump(rf_model, "../models/crop_model.pkl")
print("Model saved successfully!")