In [182]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import cross_val_score

In [183]:
df = pd.read_csv('cr.csv')
# df = pd.get_dummies(df, columns=['Soil Type'], drop_first=True)

In [184]:
le_crop = LabelEncoder()
df['Crop Type'] = le_crop.fit_transform(df['label'])

In [185]:
# 3. Define features and target
X = df.drop(columns=['label'])
y = df['label']

In [186]:
# 4. Train/test split (stratify helps if classes are unbalanced)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [187]:
# 5. Scale numeric features (KNN depends on distance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [188]:
# 6. Train KNN
knn = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='minkowski')
knn.fit(X_train_scaled, y_train)

In [189]:
for k in range(1, 21):
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train_scaled, y_train, cv=5)
    print(f"k={k}, accuracy={scores.mean():.3f}")

k=1, accuracy=0.998
k=2, accuracy=0.996
k=3, accuracy=0.998
k=4, accuracy=0.997
k=5, accuracy=0.997
k=6, accuracy=0.995
k=7, accuracy=0.994
k=8, accuracy=0.994
k=9, accuracy=0.994
k=10, accuracy=0.994
k=11, accuracy=0.990
k=12, accuracy=0.991
k=13, accuracy=0.988
k=14, accuracy=0.989
k=15, accuracy=0.986
k=16, accuracy=0.988
k=17, accuracy=0.986
k=18, accuracy=0.985
k=19, accuracy=0.983
k=20, accuracy=0.982


In [190]:
# 6. Train KNN
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='minkowski')
knn.fit(X_train_scaled, y_train)

In [191]:
# 7. Evaluate
y_pred = knn.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9954545454545455
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       1.00      1.00      1.00        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.95      1.00      0.98        20
       maize       1.00      1.00      1.00        20
       mango       0.95      1.00      0.98        20
   mothbeans       1.00      0.90      0.95        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00      1.00      1.00        2

In [220]:
sample = [[40, 20, 10, 10, 82.00274423, 1, 202.9355362, 0]]
sample_scaled = scaler.transform(sample)

# 9. Get top-N crops (e.g., top 3)
probs = knn.predict_proba(sample_scaled)[0]
top_n = 4
top_indices = np.argsort(probs)[::-1][:top_n]
top_crops = le_crop.inverse_transform(top_indices)
top_probs = probs[top_indices]
print(top_crops)
print(top_probs)

['mothbeans' 'pigeonpeas' 'rice' 'pomegranate']
[0.66606963 0.33393037 0.         0.        ]




In [221]:
print("Top crop recommendations:")
for crop, prob in zip(top_crops, top_probs):
    if prob > 0:
      print(f"{crop}: {prob:.2%}")

Top crop recommendations:
mothbeans: 66.61%
pigeonpeas: 33.39%
