# Crop Recommendation Module
Train a random forest model and expose a prediction helper.

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
def find_dataset_root():
    candidates = [
        os.path.join(os.getcwd(), "dataset"),
        os.path.join(os.path.dirname(os.getcwd()), "dataset")
    ]
    for path in candidates:
        if os.path.isdir(path):
            return path
    raise FileNotFoundError("Dataset folder not found. Expected ./dataset or ../dataset")

DATASET_ROOT = find_dataset_root()
DATASET_PATH = os.path.join(
    DATASET_ROOT,
    "atharvaingle",
    "crop-recommendation-dataset",
    "Crop_recommendation.csv"
)

if not os.path.exists(DATASET_PATH):
    raise FileNotFoundError(f"Missing dataset file: {DATASET_PATH}")

crop_recommendation_df = pd.read_csv(DATASET_PATH)
X = crop_recommendation_df.drop('label', axis=1)
y = crop_recommendation_df['label']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

test_accuracy = accuracy_score(y_test, rf_model.predict(X_test_scaled))
print(f"Model trained. Test accuracy: {test_accuracy:.3f}")

In [None]:
def predict_crop_recommendation(N, P, K, temperature, humidity, ph, rainfall):
    input_data = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    input_scaled = scaler.transform(input_data)
    prediction = rf_model.predict(input_scaled)
    probabilities = rf_model.predict_proba(input_scaled)[0]
    predicted_crop = label_encoder.inverse_transform(prediction)[0]

    top_3_indices = np.argsort(probabilities)[-3:][::-1]
    top_3_crops = label_encoder.inverse_transform(top_3_indices)
    top_3_probs = probabilities[top_3_indices]
    recommendations = "\n".join(
        [
            f"{i + 1}. {crop}: {prob * 100:.2f}% confidence"
            for i, (crop, prob) in enumerate(zip(top_3_crops, top_3_probs))
        ]
    )

    return (
        f"Recommended Crop: {predicted_crop}\n\n"
        f"Top 3 Recommendations:\n{recommendations}"
    )

In [None]:
sample_output = predict_crop_recommendation(90, 40, 40, 25, 70, 6.5, 120)
print(sample_output)