In [62]:
# Importing libraries

from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn import metrics
from sklearn import tree
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import cross_val_score

In [63]:
df = pd.read_csv('crop_recommendation.csv')

In [64]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,93,35,44,20.879744,82.002744,6.502985,202.935536,rice
1,73,37,35,21.770462,80.319644,7.038096,226.655537,rice
2,68,45,35,23.004459,82.320763,7.840207,263.964248,rice
3,88,56,44,26.491096,80.158363,6.980401,242.864034,rice
4,100,54,45,20.130175,81.604873,7.628473,262.71734,rice


In [65]:
df.tail()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
2195,85,35,31,26.774637,66.413269,6.780064,177.774507,coffee
2196,98,39,28,27.417112,56.636362,6.086922,127.92461,coffee
2197,104,35,34,24.131797,67.225123,6.362608,173.322839,coffee
2198,97,36,33,26.272418,52.127394,6.758793,127.175293,coffee
2199,87,31,33,23.603016,60.396475,6.779833,140.937041,coffee


In [66]:
df.size

17600

In [67]:
df.shape

(2200, 8)

In [68]:
df.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [69]:
df['label'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [70]:
df.dtypes

N                int64
P                int64
K                int64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
label           object
dtype: object

In [71]:
df['label'].value_counts()

label
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: count, dtype: int64

### Seperating features and target label

In [73]:
features = df[['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall']]
target = df['label']
#features = df[['temperature', 'humidity', 'ph', 'rainfall']]
labels = df['label']

In [74]:
# Initialzing empty lists to append all model's name and corresponding name
acc = []
model = []

In [75]:
# Splitting into train and test data

from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,target,test_size = 0.2,random_state =2)

In [76]:
Ytrain

1936         cotton
610        mungbean
372     kidneybeans
1559          apple
1500          apple
           ...     
1071         banana
433      pigeonpeas
674        mungbean
1099         banana
1608         orange
Name: label, Length: 1760, dtype: object

# Random Forest

In [78]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(n_estimators=20, random_state=0)
RF.fit(Xtrain,Ytrain)

predicted_values = RF.predict(Xtest)

x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('RF')
print("RF's Accuracy is: ", x)

print(classification_report(Ytest,predicted_values))

RF's Accuracy is:  0.9886363636363636
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        13
      banana       1.00      1.00      1.00        17
   blackgram       1.00      1.00      1.00        16
    chickpea       1.00      1.00      1.00        21
     coconut       1.00      1.00      1.00        21
      coffee       1.00      1.00      1.00        22
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        18
        jute       0.90      0.93      0.91        28
 kidneybeans       1.00      1.00      1.00        14
      lentil       1.00      1.00      1.00        23
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        26
   mothbeans       1.00      1.00      1.00        19
    mungbean       1.00      1.00      1.00        24
   muskmelon       1.00      1.00      1.00        23
      orange       1.00      1.00      1.00

In [79]:
Ytrain

1936         cotton
610        mungbean
372     kidneybeans
1559          apple
1500          apple
           ...     
1071         banana
433      pigeonpeas
674        mungbean
1099         banana
1608         orange
Name: label, Length: 1760, dtype: object

In [80]:
Xtrain

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
1936,140,60,19,22.000851,79.472710,7.388266,90.422242
610,27,54,22,29.530376,86.733460,7.156563,59.872321
372,5,63,24,18.623288,23.024103,5.532101,135.337803
1559,17,127,197,23.641424,93.744615,6.155939,116.691218
1500,16,145,204,22.750888,90.694892,5.521467,110.431786
...,...,...,...,...,...,...,...
1071,99,74,49,25.787498,84.511942,6.020445,114.200546
433,22,66,24,23.453790,46.487148,7.109598,150.871220
674,27,42,19,29.256493,81.979522,6.864839,42.024833
1099,95,94,49,29.507046,78.205856,5.507642,98.125658


In [81]:
# Cross validation score (Random Forest)
score = cross_val_score(RF,features,target,cv=5)
score

array([0.99772727, 0.99545455, 0.99090909, 0.99318182, 0.99318182])

### Saving trained Random Forest model

In [83]:
import pickle
# Dump the trained Naive Bayes classifier with Pickle
RF_pkl_filename = 'rf_crop_reccomend_Nov24.pkl'
# Open the file to save as pkl file
RF_Model_pkl = open(RF_pkl_filename, 'wb')
pickle.dump(RF, RF_Model_pkl)
# Close the pickle instances
RF_Model_pkl.close()

## Accuracy Comparison

In [85]:
accuracy_models = dict(zip(model, acc))
for k, v in accuracy_models.items():
    print (k, '-->', v)

RF --> 0.9886363636363636


## Making a prediction

In [87]:
data = np.array([[104,18, 30, 23.603016, 60.3, 6.7, 140.91]])
prediction = RF.predict(data)
print(prediction)

['coffee']


In [88]:
data = np.array([[83, 45, 60, 28, 70.3, 7.0, 150.9]])
prediction = RF.predict(data)
print(prediction)

['papaya']
