# Installing reqwired modules

In [75]:
# !pip install scikit-learn
# !pip install pandas
# !pip install numpy as np


# Importing Modules

In [76]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [77]:
# Loading Dataset
data = pd.read_csv("data\processed\crop_data.csv")
data.head()


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,crop
0,90,42,43,20.9,82.0,6.5,202.9,rice
1,85,58,41,21.8,80.3,7.0,226.7,rice
2,60,55,44,23.0,82.3,7.8,264.0,rice
3,74,35,40,26.5,80.2,7.0,242.9,rice
4,78,42,42,20.1,81.6,7.6,262.7,rice


In [78]:
# Splitting features and targets
x = data[['N','P','K', "temperature","humidity", "ph", "rainfall"]]
y = data['crop']

In [79]:
# Encoding target labels
le = LabelEncoder()
y = le.fit_transform(y)

In [80]:
# split dataset
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size=0.2, random_state=42)

In [81]:
# Random Forest model
model = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
model.fit(x_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [82]:
# Evaluating Model
y_pred = model.predict(x_test)
print(f"Accuracy:" , accuracy_score(y_test, y_pred))
print(f"Detailed report", classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.9931818181818182
Detailed report               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00   

In [83]:
# Making Models
joblib.dump(model, "models/crop_model.pkl")
joblib.dump(le, "models/LabelEncoder.pkl")

['models/LabelEncoder.pkl']

In [84]:
sample = [[90,42,43,20.9,82.0,6.5,202.9]]

pred_num = model.predict(sample)
pred_crop = le.inverse_transform(pred_num)
print("Reccomended Crop: ", pred_crop[0])


Reccomended Crop:  rice




In [85]:
# Feature Importance
imp = model.feature_importances_
for col, val in sorted(zip(x.columns, imp), key=lambda x: x[1], reverse=True):
    print(col, round(val, 3))

rainfall 0.228
humidity 0.216
K 0.179
P 0.153
N 0.102
temperature 0.075
ph 0.046


In [86]:
import joblib
import numpy as np

# 1️⃣ Load trained model and encoder
model = joblib.load("models/crop_model.pkl")
label_encoder = joblib.load("models/LabelEncoder.pkl")

# 2️⃣ Function to get top 3 crops with confidence
def recommend_top3_with_confidence(features):
    """
    features: list of [N, P, K, ph, rainfall, humidity, temp]
    returns: list of tuples (crop_name, confidence%)
    """
    # Predict probabilities for all crops
    proba = model.predict_proba([features])[0]
    
    # Get indices of top 3 probabilities
    top3_idx = np.argsort(proba)[-3:][::-1]
    
    # Convert indices to crop names
    top3_crops = label_encoder.inverse_transform(top3_idx)
    
    # Pair crop names with confidence percentages
    result = [(crop, round(proba[label_encoder.transform([crop])[0]]*100, 2)) 
              for crop in top3_crops]
    
    return result

# 3️⃣ Example input
sample_input = [89,45,36,21.3,80.5,6.4,185.5]  # [N, P, K, ph, rainfall, humidity, temp]

# 4️⃣ Get top 3 crops with confidence
top3_with_confidence = recommend_top3_with_confidence(sample_input)

# 5️⃣ Display results
print("Top 3 Recommended Crops with Confidence:")
for crop, confidence in top3_with_confidence:
    print(f"{crop}: {confidence}%")


Top 3 Recommended Crops with Confidence:
rice: 85.0%
jute: 15.0%
pomegranate: 0.0%


