import warnings
warnings.filterwarnings('ignore')

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import seaborn as sns

In [2]:
PATH = './datasets/Crop_recommendation.csv'
MODELS = './recommender-models/'
data = pd.read_csv(PATH)

FileNotFoundError: [Errno 2] No such file or directory: './datasets/Crop_recommendation.csv'

## Data Analysis

In [None]:
data.head()

This dataset consists of **2200** rows in total.

**Each row has 8 columns representing Nitrogen, Phosphorous, Potassium, Temperature, Humidity, PH, Rainfall and Label**

**NPK(Nitrogen, Phosphorous and Potassium)** values represent the NPK values in the soil.
**Temperature**, **humidity** and **rainfall** are the average values of the sorroundings environment respectively.
**PH** is the PH value present in the soil.
**Label** is the value of the type of crop which is suitable to be grown in the above given variable conditions. **Label is the value we will be predicting**


In [None]:
data.info()

In [None]:
data.columns

The different types of labels are given below

In [None]:
data['label'].unique()

In [None]:
data['label'].value_counts()

## Seperating features and output labels and creating training and test data 

In [None]:
features = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
labels = data['label']

In [None]:
# Splitting into the training and test dataset

# Train:Test = 4:1

X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [None]:
acc = []
models_list = []

### Logistic Regression

In [None]:
LogReg = LogisticRegression(random_state=42).fit(X_train, Y_train)

predicted_values = LogReg.predict(X_test)

accuracy = metrics.accuracy_score(Y_test, predicted_values)

acc.append(accuracy)
models_list.append("Logistic Regression")

In [None]:
print("Logistic Regression accuracy: ", accuracy)

In [None]:
print(metrics.classification_report(Y_test, predicted_values))

In [None]:
filename = 'LogisticRegresion.pkl'
pickle.dump(LogReg, open(MODELS + filename, 'wb'))

### Decision Tree

In [None]:
DT = DecisionTreeClassifier(criterion="entropy",random_state=2,max_depth=1000).fit(X_train, Y_train)

predicted_values = DT.predict(X_test)

accuracy = metrics.accuracy_score(Y_test, predicted_values)

acc.append(accuracy)
models_list.append("Decision Tree")

In [None]:
print("Decision Tree accuracy: ", accuracy)

In [None]:
print(metrics.classification_report(Y_test, predicted_values))

In [None]:
filename = 'DecisionTree.pkl'
pickle.dump(LogReg, open(MODELS + filename, 'wb'))

### Support Vector Machine SVM

In [None]:
SVM = SVC(gamma='auto').fit(X_train, Y_train)

predicted_values = SVM.predict(X_test)

accuracy = metrics.accuracy_score(Y_test, predicted_values)

acc.append(accuracy)
models_list.append("Support Vector Machine (SVM)")

In [None]:
print("SVM accuracy: ", accuracy)

In [None]:
print(metrics.classification_report(Y_test, predicted_values))

In [None]:
filename = 'SVM.pkl'
pickle.dump(LogReg, open(MODELS + filename, 'wb'))

### Multi-Layer Perceptron

In [None]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 600, 400, 200, 100, 50), random_state=1)

clf.fit(X_train, Y_train)

predicted_values = clf.predict(X_test)

accuracy = metrics.accuracy_score(Y_test, predicted_values)

In [None]:
acc.append(accuracy)
models_list.append("Multi-Layer Perceptron")

In [None]:
print("SVM accuracy: ", accuracy)

In [None]:
print(metrics.classification_report(Y_test, predicted_values))

In [None]:
filename = 'MLP.pkl'
pickle.dump(LogReg, open(MODELS + filename, 'wb'))

### Random Forest

In [None]:
RF = RandomForestClassifier(n_estimators=20, random_state=0)
RF.fit(X_train,Y_train)

predicted_values = RF.predict(X_test)

accuracy = metrics.accuracy_score(Y_test, predicted_values)

acc.append(accuracy)
models_list.append('Random Forest')

In [None]:
print("RF's Accuracy is: ", accuracy)

In [None]:
print(metrics.classification_report(Y_test,predicted_values))

In [None]:
filename = 'RF.pkl'
pickle.dump(LogReg, open(MODELS + filename, 'wb'))

In [None]:
plt.figure(figsize=[10,5],dpi = 100)
plt.title('Accuracy Comparison')
plt.xlabel('Accuracy')
plt.ylabel('Algorithm')
sns.barplot(x = acc,y = models_list,palette='twilight_shifted_r')

In [None]:
accuracy_models = dict(zip(models_list, acc))
for k, v in accuracy_models.items():
    print (k, '-->', v)