In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import sklearn.model_selection as model_selection
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, f1_score, precision_score, recall_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.svm import SVC

In [2]:
# Load Training data
crops_df = pd.read_csv('crops_dataset.csv')
crops_df

Unnamed: 0,nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [3]:
features = ['nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph', 'rainfall']
X = crops_df[features].to_numpy()

In [4]:
#Determine the list of unique crops
y = crops_df['label'].to_numpy()
labels = crops_df["label"].unique()
print("No. of unique crop labels: ", labels.size)
print(labels)

No. of unique crop labels:  22
['rice' 'maize' 'chickpea' 'kidneybeans' 'pigeonpeas' 'mothbeans'
 'mungbean' 'blackgram' 'lentil' 'pomegranate' 'banana' 'mango' 'grapes'
 'watermelon' 'muskmelon' 'apple' 'orange' 'papaya' 'coconut' 'cotton'
 'jute' 'coffee']


In [5]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=.2, random_state=1)

### 2.1 K-Nearest Neighbor (KNN) Classifier

#### 2.1.2 Load and 'Train' K Neighbors Classifier Model

In [6]:
knn = KNeighborsClassifier(n_neighbors=3) #Uses number of labels
knn.fit(X,y)

#### 2.1.3 Test Predictions

In [7]:
y_pred = knn.predict(X_test)

#### 2.1.4 Check Predictions

In [8]:
knn_accuracy = round(accuracy_score(y_test, y_pred),4)
knn_accuracy

0.9841

In [9]:
knn_cr = classification_report(y_test, y_pred, 4)

### 2.2 Decision Trees Classifier

#### 2.2.2 Load and 'Train' Decision Tree Classifier Model

In [10]:
dtc = DecisionTreeClassifier()
dtc.fit(X,y)

#### 2.2.3 Test Predictions

In [11]:
y_pred = dtc.predict(X_test)

#### 2.2.5 Check Predictions

In [12]:
dtc_accuracy = round(accuracy_score(y_test, y_pred),4)
dtc_accuracy

1.0

In [13]:
dtc_cr = classification_report(y_test, y_pred, digits=4)

### 2.3 Logistic Regression Classifier

#### 2.3.1 Load and 'Train' Logistic Regression Classifier Model

In [14]:
lr_model = LogisticRegression(penalty=None, verbose=True, max_iter=10000, n_jobs=os.cpu_count())
lr_model.fit(X, y)

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   1 out of   1 | elapsed:   34.0s finished


#### 2.3.2 Test Predictions 

In [15]:
y_pred = lr_model.predict(X_test)

#### 2.3.3 Check Predictions

In [16]:
lr_accuracy = round(accuracy_score(y_test, y_pred),4)
lr_accuracy

0.9932

#### Via Classification Report

In [18]:
lr_cr = classification_report(y_test, y_pred, digits=4)

# 3. Comparison of Machine Learning Models

#### Accuracy, Precision, Recall, and F1 Scores:

In [19]:
print("KNN")
print(knn_cr)
print("DTC")
print(dtc_cr)
print("LR")
print(lr_cr)

KNN
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        22
      banana       1.00      1.00      1.00        18
   blackgram       0.96      1.00      0.98        22
    chickpea       1.00      1.00      1.00        15
     coconut       1.00      1.00      1.00        18
      coffee       1.00      1.00      1.00        17
      cotton       0.96      1.00      0.98        22
      grapes       1.00      1.00      1.00        29
        jute       0.92      0.92      0.92        25
 kidneybeans       0.95      1.00      0.98        20
      lentil       1.00      1.00      1.00        18
       maize       1.00      0.95      0.97        20
       mango       1.00      1.00      1.00        17
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        24
   muskmelon       1.00      1.00      1.00        26
      orange       1.00      1.00      1.00        15
      papaya       1.00