In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
#Importing Decision Tree classifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

In [2]:
df = pd.read_csv('dataset/dataset.csv')
df.head()

Unnamed: 0,temperature,humidity,ph,soil_moisture,label
0,20.879744,82.002744,6.502985,202.935536,rice
1,21.770462,80.319644,7.038096,226.655537,rice
2,23.004459,82.320763,7.840207,263.964248,rice
3,26.491096,80.158363,6.980401,242.864034,rice
4,20.130175,81.604873,7.628473,262.71734,rice


## Analysing Data

In [3]:
df.shape

(3100, 5)

#### Dataset Info

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3100 entries, 0 to 3099
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   temperature    3100 non-null   float64
 1   humidity       3100 non-null   float64
 2   ph             3100 non-null   float64
 3   soil_moisture  3100 non-null   float64
 4   label          3100 non-null   object 
dtypes: float64(4), object(1)
memory usage: 121.2+ KB


#### Checking for Missing Values
Since we can't do statical operations with missing or NaN values, we have to concious about that.

In [5]:
df.isnull().sum()

temperature      0
humidity         0
ph               0
soil_moisture    0
label            0
dtype: int64

## Dataset Preprocessing

In [6]:
# Keeping every column headers
columns = list(df.columns.values)
print("Columns = ", columns)

Columns =  ['temperature', 'humidity', 'ph', 'soil_moisture', 'label']


In [7]:
# Creating dummy variable for patient_gender
# For one hot encoding
one_hot = pd.get_dummies(df.label).iloc[:, :]
one_hot.head()

Unnamed: 0,Adzuki Beans,Black gram,Chickpea,Coconut,Coffee,Cotton,Ground Nut,Jute,Kidney Beans,Lentil,...,maize,mango,millet,muskmelon,orange,papaya,pomegranate,rice,watermelon,wheat
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [8]:
df = pd.concat([df, one_hot], axis=1)
df.head(4)

Unnamed: 0,temperature,humidity,ph,soil_moisture,label,Adzuki Beans,Black gram,Chickpea,Coconut,Coffee,...,maize,mango,millet,muskmelon,orange,papaya,pomegranate,rice,watermelon,wheat
0,20.879744,82.002744,6.502985,202.935536,rice,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,21.770462,80.319644,7.038096,226.655537,rice,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,23.004459,82.320763,7.840207,263.964248,rice,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,26.491096,80.158363,6.980401,242.864034,rice,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [9]:
# Since we one hot encoded we do not want that column anymore
df.drop('label', axis=1, inplace=True)
df.head(4)
# Keeping every column headers
new_columns = list(df.columns.values)
print("Columns = ", new_columns)
# df.to_csv("test.csv")

Columns =  ['temperature', 'humidity', 'ph', 'soil_moisture', 'Adzuki Beans', 'Black gram', 'Chickpea', 'Coconut', 'Coffee', 'Cotton', 'Ground Nut', 'Jute', 'Kidney Beans', 'Lentil', 'Moth Beans', 'Mung Bean', 'Peas', 'Pigeon Peas', 'Rubber', 'Sugarcane', 'Tea', 'Tobacco', 'apple', 'banana', 'grapes', 'maize', 'mango', 'millet', 'muskmelon', 'orange', 'papaya', 'pomegranate', 'rice', 'watermelon', 'wheat']


In [10]:
# Training data do not contains any headers
# During training we are performing statistical operations on data
# Statistical operations only can be performed on numbers
# So lets convert our data to numpy arrays
train=df.iloc[:, 0:4].values
test=df.iloc[: ,4:].values
#Dividing the data into training and test set
X_train,X_test,y_train,y_test=train_test_split(train,test,test_size=0.3)

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
clf=DecisionTreeClassifier(random_state=0)
reg=DecisionTreeRegressor()

In [13]:
#Fitting the classifier into training set
clf.fit(X_train,y_train)
pred=clf.predict(X_test)

reg.fit(X_train,y_train)
pred_reg=reg.predict(X_test)

In [14]:
# Finding the accuracy of the model
a=accuracy_score(y_test,pred)
print("The accuracy of this model is: ", a*100)

# Finding the accuracy of the model
reg_accu=accuracy_score(y_test,pred_reg)
print("The accuracy of this model is: ", reg_accu*100)

The accuracy of this model is:  91.29032258064517
The accuracy of this model is:  90.53763440860216


In [15]:
# predict_crop = [[air_humidity],[air_temp],[soil_pH],[soil_moisture]]
predict_crop = [[28.00510977,65.91544318,4.725085187,366.3950408]]

In [16]:
# Putting the names of crop in a single list
crops= ['Adzuki Beans', 'Black gram', 'Chickpea', 'Coconut', 'Coffee', 'Cotton', 'Ground Nut', 'Jute', 'Kidney Beans', 'Lentil', 'Moth Beans', 'Mung Bean', 'Peas', 'Pigeon Peas', 'Rubber', 'Sugarcane', 'Tea', 'Tobacco', 'apple', 'banana', 'grapes', 'maize', 'mango', 'millet', 'muskmelon', 'orange', 'papaya', 'pomegranate', 'rice', 'watermelon', 'wheat']

In [17]:
predict_crop = sc.transform(predict_crop)
predictions = clf.predict(predict_crop)
a = np.argmax(predictions)
print(crops[a])
test = clf.predict_proba(predict_crop)
print(test)

predict_crop = sc.transform(predict_crop)
predictions = reg.predict(predict_crop)
print(predictions)
a = np.argmax(predictions)
print(crops[a])
test = clf.predict_proba(predict_crop)
test

maize
[array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[0., 1.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]]), array([[1., 0.]])]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 0.]]
muskmelon


[array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[0., 1.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]]),
 array([[1., 0.]])]