In [44]:
# importing the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [45]:
# loading the dataset
df = pd.read_csv('/content/Crop.csv')
df.sample(5)

Unnamed: 0,N,P,K,ph,EC,S,Cu,Fe,Mn,Zn,B,label
374,175,20,76,6.3,0.72,0.264,18.91,259.43,208.4,20.67,22.94,mulberry
116,86,80,195,6.8,1.22,0.21,19.53,62.65,55.71,22.56,64.69,mango
480,149,96,316,7.7,1.38,0.0188,21.02,212.21,62.38,20.62,9.5,ragi
26,163,61,208,6.6,0.43,0.28,7.43,74.29,35.88,70.0,17.96,pomegranate
368,139,21,103,6.7,0.84,0.226,27.51,164.9,1264.7,21.71,21.09,mulberry


In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 620 entries, 0 to 619
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   N       620 non-null    int64  
 1   P       620 non-null    int64  
 2   K       620 non-null    int64  
 3   ph      620 non-null    float64
 4   EC      620 non-null    float64
 5   S       620 non-null    float64
 6   Cu      620 non-null    float64
 7   Fe      620 non-null    float64
 8   Mn      620 non-null    float64
 9   Zn      620 non-null    float64
 10  B       620 non-null    float64
 11  label   620 non-null    object 
dtypes: float64(8), int64(3), object(1)
memory usage: 58.3+ KB


In [47]:
# Separating the target data
X = df.drop('label', axis=1)
y = df['label']

In [48]:
# Min_Max scaling
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [49]:
# spliting into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [50]:
# Getting the model and assigning search space
model = RandomForestClassifier(random_state=42)
search_space = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [4, 6, 8],
    'max_leaf_nodes': [4, 6, 8]
}

In [51]:
# GridSearch and Fit the model
GS = GridSearchCV(RandomForestClassifier(),
                  param_grid=search_space)
GS.fit(X_train, y_train)

135 fits failed out of a total of 405.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
135 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.11/dist-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.11/dist-packages/sklearn/base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.11/dist-packages/sklearn/utils/_param_validation.py", line 98, in validate_parameter_constraints
    raise InvalidParameterError(
sk

In [52]:
# Getting best estimator and score
print(GS.best_estimator_)
print(GS.best_score_)

RandomForestClassifier(max_depth=4, max_leaf_nodes=4)
0.9656767676767677


In [53]:
# Precision, Recall, F1 Score and Confusion Matrix
y_pred = GS.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[23  0  0  0  0  1]
 [ 1 17  0  0  0  0]
 [ 0  0 21  0  0  0]
 [ 0  1  0 21  0  0]
 [ 0  0  0  0 23  0]
 [ 0  0  0  0  0 16]]
              precision    recall  f1-score   support

      grapes       0.96      0.96      0.96        24
       mango       0.94      0.94      0.94        18
    mulberry       1.00      1.00      1.00        21
 pomegranate       1.00      0.95      0.98        22
      potato       1.00      1.00      1.00        23
        ragi       0.94      1.00      0.97        16

    accuracy                           0.98       124
   macro avg       0.97      0.98      0.97       124
weighted avg       0.98      0.98      0.98       124

