<a href="https://colab.research.google.com/github/anuva04/ML_Beginners/blob/main/crops.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification of crops
### Random forest classifier and XGBoost

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [4]:
print(X_train)

[[6.   0.55 0.72 0.17]
 [6.19 0.66 0.6  0.09]
 [7.12 0.72 0.57 0.09]
 ...
 [5.18 0.58 0.75 0.13]
 [6.54 0.58 0.21 0.16]
 [5.74 0.44 0.55 0.64]]


In [5]:
print(X_test)

[[5.26 0.79 0.88 0.23]
 [6.95 0.89 0.66 0.65]
 [6.15 0.77 0.59 0.27]
 ...
 [7.49 0.84 0.77 0.12]
 [6.41 0.26 0.61 0.1 ]
 [6.74 0.47 0.64 0.19]]


In [6]:
print(y_train)

['coffee' 'dahlia' 'dahlia' ... 'coffee' 'paddy' 'cauliflower']


In [7]:
print(y_test)

['strawberry' 'cabbage' 'marigold' ... 'rose' 'Onion' 'lily']


### Feature Scaling

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
print(X_train)

[[-0.31121463 -0.6374737   0.53241419 -0.3960485 ]
 [-0.04407651  0.03438901 -0.25595825 -0.92771129]
 [ 1.26349426  0.40085957 -0.45305136 -0.92771129]
 ...
 [-1.46412649 -0.45423842  0.7295073  -0.6618799 ]
 [ 0.44802002 -0.45423842 -2.81816868 -0.46250635]
 [-0.67677205 -1.30933641 -0.58444677  2.7274704 ]]


In [10]:
print(X_test)

[[-1.35164728e+00  8.28408568e-01  1.58357744e+00  2.69859145e-03]
 [ 1.02447595e+00  1.43919285e+00  1.38227968e-01  2.79392825e+00]
 [-1.00316114e-01  7.06251712e-01 -3.21655955e-01  2.68529988e-01]
 ...
 [ 1.78371059e+00  1.13380071e+00  8.60902704e-01 -7.28337748e-01]
 [ 2.65241306e-01 -2.40874811e+00 -1.90260549e-01 -8.61253446e-01]
 [ 7.29218031e-01 -1.12610112e+00  6.83256114e-03 -2.63132805e-01]]


### Random Forest Classifier

In [11]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators = 13, criterion = 'entropy', random_state = 0)
rf_classifier.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=13,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [12]:
print(rf_classifier.predict(sc.transform([[7.37, 0.86, 0.8, 0.26]])))

['rose']


In [15]:
y_pred_rf = rf_classifier.predict(X_test)
print(np.concatenate((y_pred_rf.reshape(len(y_pred_rf),1), y_test.reshape(len(y_test),1)),1))

[['strawberry' 'strawberry']
 ['cabbage' 'cabbage']
 ['marigold' 'marigold']
 ...
 ['rose' 'rose']
 ['Onion' 'Onion']
 ['lily' 'lily']]


In [19]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred_rf)
print(cm)
accuracy_score(y_test, y_pred_rf)

[[134   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0   0
    0   0   0]
 [  0  99  22   0   0   0   1   0   3   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0  12 115   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0 134   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0 126   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0  86   0   0   0   0   0   0   0   0   0   0   0   0
    0  44   0]
 [  0   3   0   0   0   0  89   0  19   0   0   0   1   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0   0   0 132   0   0   0   0   2   0   0   0   0   0
    0   0   0]
 [  0   3   0   0   0   0  16   0  90   0   0   0   0   0   0   0   0   3
    0   0   0]
 [  0   0   0   0   0   0   0   0   0  45   0  41   0   0  41   0   0   2
    0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 127   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0

0.8610909090909091

In [20]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = rf_classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 84.59 %
Standard Deviation: 1.07 %


### XGBoost

In [21]:
from xgboost import XGBClassifier
xgbclassifer = XGBClassifier()
xgbclassifer.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [22]:
y_pred_xgb = xgbclassifer.predict(X_test)
cm = confusion_matrix(y_test, y_pred_xgb)
print(cm)
accuracy_score(y_test, y_pred_xgb)

[[134   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0   0
    0   0   0]
 [  0  99  24   0   0   0   0   0   2   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0  13 114   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0 134   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0 126   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0  88   0   0   0   0   0   0   0   0   0   0   0   0
    0  42   0]
 [  0   3   0   0   0   0  93   0  15   0   0   0   1   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0   0   0 134   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   2   0   0   0   0  25   0  85   0   0   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0   0   0   0   0  24   0  55   0   0  50   0   0   0
    0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 127   0   0   0   0   0   0   0
    0   0   0]
 [  0   0   0   0   0

0.866909090909091

In [23]:
accuracies = cross_val_score(estimator = xgbclassifer, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 86.35 %
Standard Deviation: 1.14 %
