In [24]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [25]:
#loading the csv file and filling missing values with empty string
dataset = pd.read_csv('../data/Crop_recommendation.csv')
dataset.fillna(value='', inplace=True)
dataset

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [26]:
#selecting specific columns according to our requirement
X = dataset.iloc[:,:].values
X

array([[90, 42, 43, ..., 6.502985292000001, 202.9355362, 'rice'],
       [85, 58, 41, ..., 7.038096361, 226.6555374, 'rice'],
       [60, 55, 44, ..., 7.840207144, 263.96424759999996, 'rice'],
       ...,
       [118, 33, 30, ..., 6.362607851, 173.32283859999995, 'coffee'],
       [117, 32, 34, ..., 6.758792552, 127.17529280000001, 'coffee'],
       [104, 18, 30, ..., 6.7798326110000025, 140.9370415, 'coffee']],
      dtype=object)

In [27]:
mapping = X[:,-1].copy()
mappingdf = pd.DataFrame(mapping)
mappingdf.insert(1,1,"0")
maparr=mappingdf.iloc[:,:].values
maparr

array([['rice', '0'],
       ['rice', '0'],
       ['rice', '0'],
       ...,
       ['coffee', '0'],
       ['coffee', '0'],
       ['coffee', '0']], dtype=object)

In [28]:
#applying one hot encoder on closely related categorical data
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, -1] = le.fit_transform(X[:, -1])
X

array([[90, 42, 43, ..., 6.502985292000001, 202.9355362, 20],
       [85, 58, 41, ..., 7.038096361, 226.6555374, 20],
       [60, 55, 44, ..., 7.840207144, 263.96424759999996, 20],
       ...,
       [118, 33, 30, ..., 6.362607851, 173.32283859999995, 5],
       [117, 32, 34, ..., 6.758792552, 127.17529280000001, 5],
       [104, 18, 30, ..., 6.7798326110000025, 140.9370415, 5]],
      dtype=object)

In [29]:
#array to track encoded labels
maparr[:, -1] = le.fit_transform(maparr[:, 0])
maparrdf=pd.DataFrame(maparr)
maparrdf = maparrdf.drop_duplicates(0)
maparrdf

Unnamed: 0,0,1
0,rice,20
100,maize,11
200,chickpea,3
300,kidneybeans,9
400,pigeonpeas,18
500,mothbeans,13
600,mungbean,14
700,blackgram,2
800,lentil,10
900,pomegranate,19


In [30]:
#seperating the dependant attributes and the independent(result) attribute
y=X[:,-1]
X=np.delete(X,7,1)
y

array([20, 20, 20, ..., 5, 5, 5], dtype=object)

In [31]:
#changing the datatype of array to float32
X = np.asarray(X).astype(np.float32)
y = np.asarray(y).astype(np.float32)

In [32]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [33]:
#applying feature scaling to the arrays
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

# Naive Bayes

In [34]:
from sklearn.naive_bayes import GaussianNB
classifier1 = GaussianNB()
classifier1.fit(X_train, y_train)

GaussianNB()

In [35]:
y_pred1 = classifier1.predict(X_test)

In [36]:
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))

Accuracy: 0.9945454545454545


In [37]:
new_input=[[90,13,45,27.7,81.9,6.82,197.34]]
new_input = np.asarray(new_input).astype(np.float32)

In [38]:
new_output=classifier1.predict(new_input)
crop=maparrdf.loc[maparrdf[1] == new_output[0]].iloc[0,0]
crop

'rice'

# KNN

In [39]:
from sklearn.neighbors import KNeighborsClassifier
classifier2 = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier2.fit(X_train, y_train)

KNeighborsClassifier()

In [40]:
y_pred2 = classifier2.predict(X_test)

In [41]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred2))

Accuracy: 0.9781818181818182


In [42]:
new_output=classifier2.predict(new_input)
crop=maparrdf.loc[maparrdf[1] == new_output[0]].iloc[0,0]
crop

'jute'

# SVM

In [43]:
from sklearn.svm import SVC
classifier3 = SVC(kernel = 'linear', random_state = 0)
classifier3.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [44]:
y_pred3 = classifier3.predict(X_test)

In [45]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred3))

Accuracy: 0.9872727272727273


In [46]:
new_output=classifier3.predict(new_input)
crop=maparrdf.loc[maparrdf[1] == new_output[0]].iloc[0,0]
crop

'jute'

# Random Forrest

In [47]:
from sklearn.ensemble import RandomForestClassifier
classifier4 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier4.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [48]:
y_pred4 = classifier4.predict(X_test)

In [49]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred4))

Accuracy: 0.9927272727272727


In [50]:
new_output=classifier4.predict(new_input)
crop=maparrdf.loc[maparrdf[1] == new_output[0]].iloc[0,0]
crop

'jute'

# Majority Voting

In [51]:
from sklearn.ensemble import VotingClassifier

In [52]:
final_model = VotingClassifier(
    estimators=[('nb', classifier1), ('knn', classifier2), ('svc', classifier3), ('rf', classifier4)], voting='hard')

In [53]:
final_model.fit(X_train, y_train)

VotingClassifier(estimators=[('nb', GaussianNB()),
                             ('knn', KNeighborsClassifier()),
                             ('svc', SVC(kernel='linear', random_state=0)),
                             ('rf',
                              RandomForestClassifier(criterion='entropy',
                                                     n_estimators=10,
                                                     random_state=0))])

In [54]:
pred_final = final_model.predict(X_test)

In [55]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred4))

Accuracy: 0.9927272727272727


In [56]:
new_output=final_model.predict(new_input)
crop=maparrdf.loc[maparrdf[1] == new_output[0]].iloc[0,0]
crop

'jute'

In [57]:
import pickle
with open('model.pkl', 'wb') as file:
    pickle.dump(final_model, file)