In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib
from sklearn.preprocessing import StandardScaler

### 1.Data Preprocessing

In [32]:
crop = pd.read_csv('Crop_Recommendation.csv')

In [37]:
crop.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,crop_encoded
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice,20
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice,20


In [36]:
crop.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,crop_encoded
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,10.5
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389,6.345731
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267,0.0
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686,5.0
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624,10.5
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508,16.0
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117,21.0


In [5]:
crop.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [15]:
print(type(crop))

<class 'pandas.core.frame.DataFrame'>


#### Label Encoding

In [None]:
le = LabelEncoder()

le.fit(crop['label'])

crop['crop_encoded'] = le.fit_transform(crop['label'])

crop_label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
#le.classes = contains the original crop names
#le.transform = contains the unique labels in the order they were transformed
#zip = combines the two lists into a dictionary


print("Crop to Label Mapping:")
for crop_name, label in crop_label_mapping.items():
    print(f"{crop_name} → {label}")


Crop to Label Mapping:
apple → 0
banana → 1
blackgram → 2
chickpea → 3
coconut → 4
coffee → 5
cotton → 6
grapes → 7
jute → 8
kidneybeans → 9
lentil → 10
maize → 11
mango → 12
mothbeans → 13
mungbean → 14
muskmelon → 15
orange → 16
papaya → 17
pigeonpeas → 18
pomegranate → 19
rice → 20
watermelon → 21


In [73]:
crop
print(type(crop))

<class 'pandas.core.frame.DataFrame'>


## Training

#### 1. Train the random forest model
#### 2. Save the model
#### 3. Test the accuracy of the model

In [72]:

X = crop.drop(columns=['label', 'crop_encoded'])
y = crop['crop_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


Decison = DecisionTreeClassifier(random_state=42)
Decison.fit(X_train, y_train)
y_pred = Decison.predict(X_test)
print("Decision Tree Classifier Accuracy: ", accuracy_score(y_test, y_pred) * 100)

XGBoost = RandomForestClassifier(random_state=42)
XGBoost.fit(X_train, y_train)  
y_pred = XGBoost.predict(X_test)
print("XGBoost Classifier Accuracy: ", accuracy_score(y_test, y_pred) * 100)

joblib.dump(model, 'rf_model.pkl')
joblib.dump(le, 'label_encoder.pkl')
joblib.dump(Decison, 'Decision_model.pkl')
joblib.dump(XGBoost, 'XGBoost_model.pkl')

accuracy = model.score(X_test, y_test)
print("Random forest models accuracy - " ,accuracy *100)
print("CLASSIFICATION REPORT")
print(classification_report(y_test, y_pred))
print("CONFUSION MATRIX")
print(confusion_matrix(y_test, y_pred))


Decision Tree Classifier Accuracy:  98.63636363636363
XGBoost Classifier Accuracy:  99.31818181818181
Random forest models accuracy -  99.31818181818181
CLASSIFICATION REPORT
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      1.00      0.96        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        19
          13       1.00      0.96      0.98        24
          14  

Decision Tree Classifier Accuracy:  98.63636363636363<br>
XGBoost Classifier Accuracy:  99.31818181818181<br>
Random forest models accuracy -  99.31818181818181<br>

Random forest and XGBOOSt have more accuracy then the Decision tree

In [71]:
print("enter the N, P, K, temperature, humidity, ph, rainfall")
N = float(input("Enter N: "))
P = float(input("Enter P: "))
K = float(input("Enter K: "))
temperature = float(input("Enter temperature: "))
humidity = float(input("Enter humidity: "))
ph = float(input("Enter ph: "))
rainfall = float(input("Enter rainfall: "))
predict_data = pd.DataFrame(
    [[N, P, K, temperature, humidity, ph, rainfall]],
    columns=X.columns  # Use the same feature names as training
)
print(predict_data)


predict_crop = model.predict(predict_data)
predict_crop_label = le.inverse_transform(predict_crop)[0]
result_df = pd.DataFrame({
    "Predicted Crop": [predict_crop_label]
})
print(result_df)


enter the N, P, K, temperature, humidity, ph, rainfall
       N     P     K  temperature  humidity   ph  rainfall
0  116.0  38.0  33.0         24.0      51.0  6.2     184.0
  Predicted Crop
0         coffee


## XAI METHODS
#### 1. ELI 5
#### 2. SHAP

In [74]:
import eli5
from eli5.sklearn import PermutationImportance

# Get permutation importance on the test data
perm = PermutationImportance(model, random_state=42).fit(X_test, y_test)

# Show feature importance
eli5.show_weights(perm, feature_names=X.columns.tolist())


Weight,Feature
0.3209  ± 0.0392,humidity
0.1905  ± 0.0236,K
0.1786  ± 0.0159,N
0.1745  ± 0.0166,rainfall
0.1150  ± 0.0306,P
0.0136  ± 0.0057,ph
0.0059  ± 0.0068,temperature


In [78]:
print(X_test.shape) 
print(X_test.columns)

# Check the shape of the test data


(440, 7)
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'], dtype='object')
