In [22]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.utils import shuffle
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from keras.models import Sequential
from sklearn.preprocessing import scale
import tensorflow as tf
from keras.layers import Dense
import pickle
from sklearn.metrics import accuracy_score
import random

In [38]:
df=pd.read_csv("crop_data.csv")

In [39]:
df.columns

Index(['STATE', 'SOIL_TYPE', 'N_SOIL', 'P_SOIL', 'K_SOIL', 'TEMPERATURE',
       'HUMIDITY', 'ph', 'RAINFALL', 'CROP_PRICE', 'CROP'],
      dtype='object')

In [41]:
df.drop(columns=['STATE'],inplace=True)

In [42]:
df.head()

Unnamed: 0,SOIL_TYPE,N_SOIL,P_SOIL,K_SOIL,TEMPERATURE,HUMIDITY,ph,RAINFALL,CROP_PRICE,CROP
0,Sandy soil,90,42,43,20.879744,82.002744,6.502985,202.935536,7000,Amaranthus
1,Sandy soil,85,58,41,21.770462,80.319644,7.038096,226.655537,5000,Green Banana
2,Sandy soil,60,55,44,23.004459,82.320763,7.840207,263.964248,7000,Ladies Finger
3,Sandy soil,74,35,40,26.491096,80.158363,6.980401,242.864034,7000,Bitter Gourd
4,Sandy soil,78,42,42,20.130175,81.604873,7.628473,262.71734,120000,Black pepper


In [43]:
df.nunique()

SOIL_TYPE        15
N_SOIL          137
P_SOIL          117
K_SOIL           73
TEMPERATURE    2200
HUMIDITY       2200
ph             2200
RAINFALL       2200
CROP_PRICE      502
CROP            149
dtype: int64

In [44]:
df['CROP'].unique()

array(['Amaranthus', 'Green Banana', 'Ladies Finger', 'Bitter Gourd',
       'Black pepper', 'Bottle Gourd', 'Brinjal', 'Cabbage', 'Carrot',
       'Cauliflower', 'Cluster beans', 'Coconut', 'Colacasia', 'Onion',
       'Potato', 'Tomato', 'Bengal Gram', 'Jowar', 'Paddy', 'Lentil',
       'Rice', 'Cucumber', 'Field Pea', 'French Beans', 'Green Chilli',
       'Knool Khol', 'Pumpkin', 'Raddish', 'Black Gram', 'Green Gram',
       'Jute', 'Maida Atta', 'Mustard', 'Wheat Atta', 'Garlic',
       'Masur Dal', 'Ridge Gourd', 'Arecanut', 'Red Gram', 'Maize',
       'Dry Chillies', 'Groundnut', 'Capsicum', 'Guar', 'Lemon', 'Bajra',
       'Castor Seed', 'Coriander', 'Cowpea', 'Drumstick', 'Elephat Yam',
       'Ginger', 'Seam', 'Methi Leaves', 'Green Onion', 'Peas cod',
       'Pigeon Pea', 'Sponge Gourd', 'Surat Beans', 'Sweet Potato',
       'Tinda', 'Cluster Beans', 'Cotton', 'Wheat', 'Chholia',
       'Little Gourd', 'Round gourd', 'Leafy Vegetable', 'Mint', 'Papaya',
       'Spinach', 'Pa

In [45]:
df.shape

(2200, 10)

In [46]:
df['CROP']=df['CROP'].astype("category")
df['CROP_codes']=df['CROP'].cat.codes

df['SOIL_TYPE']=df['SOIL_TYPE'].astype("category")
df['SOIL_TYPE_codes']=df['SOIL_TYPE'].cat.codes

In [None]:
df.drop(columns=['CROP','SOIL_TYPE']).corr()

In [70]:
X=df.drop(columns=['CROP','SOIL_TYPE','CROP_codes'],axis=1)
X=np.array(X)
Y=np.array(df['CROP_codes'])

Unnamed: 0,N_SOIL,P_SOIL,K_SOIL,TEMPERATURE,HUMIDITY,ph,RAINFALL,CROP_PRICE,CROP_codes,SOIL_TYPE_codes
N_SOIL,1.0,-0.23146,-0.140512,0.026504,0.190688,0.096683,0.05902,-0.001302,0.108717,-0.217312
P_SOIL,-0.23146,1.0,0.736232,-0.127541,-0.118734,-0.138019,-0.063839,-0.023683,-0.064485,0.015944
K_SOIL,-0.140512,0.736232,1.0,-0.160387,0.190859,-0.169503,-0.053461,-0.021923,-0.010712,-0.256741
TEMPERATURE,0.026504,-0.127541,-0.160387,1.0,0.20532,-0.017795,-0.030084,-0.029044,0.000355,-0.028574
HUMIDITY,0.190688,-0.118734,0.190859,0.20532,1.0,-0.008483,0.094423,-0.041414,0.034028,-0.41089
ph,0.096683,-0.138019,-0.169503,-0.017795,-0.008483,1.0,-0.109069,-0.033548,0.033992,-0.021919
RAINFALL,0.05902,-0.063839,-0.053461,-0.030084,0.094423,-0.109069,1.0,0.058847,0.03066,-0.281774
CROP_PRICE,-0.001302,-0.023683,-0.021923,-0.029044,-0.041414,-0.033548,0.058847,1.0,-0.118927,0.182653
CROP_codes,0.108717,-0.064485,-0.010712,0.000355,0.034028,0.033992,0.03066,-0.118927,1.0,-0.162522
SOIL_TYPE_codes,-0.217312,0.015944,-0.256741,-0.028574,-0.41089,-0.021919,-0.281774,0.182653,-0.162522,1.0


In [71]:
x_train,x_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2, random_state = 1,shuffle=True)

In [58]:
best_dct_model=None
best=0

Decision Tree Classifier

In [72]:
for i in range (1,100):
    dct_model=DecisionTreeClassifier(splitter='random',criterion="entropy",random_state=random.randint(5,20),max_depth=random.randint(5,20))
    dct_model.fit(x_train,y_train)
    dct_y_pred=dct_model.predict(x_test)
    score =accuracy_score(y_test,dct_y_pred)
    if score>best:
        best_dct_model=dct_model
        best=score

best

0.17954545454545454

In [63]:
def try_rf(n):
    random_forest_model=RandomForestClassifier(n_estimators=n,random_state=42)
    random_forest_model.fit(x_train,y_train)
    random_forest_y_pred=random_forest_model.predict(x_test)
    score =accuracy_score(y_test,random_forest_y_pred)
    return random_forest_model,random_forest_y_pred,score

best_score = n = 0
random_forest_best_model = knn_y_pred = None
for i in range(200, 300,20):
    random_forest_model, y_pred, score = try_rf(i)
    if score > best_score:
        best_score = score
        random_forest_best_model = random_forest_model
        random_forest_y_pred = y_pred
        n = i
print("Best Score", best_score, n)

Best Score 0.05909090909090909 240


In [53]:
def try_knn(n_neighbors):
    knn_model = KNeighborsClassifier(n_neighbors=n_neighbors,p=5,n_jobs=20,algorithm="kd_tree")
    knn_model.fit(x_train, y_train)
    knn_y_pred = knn_model.predict(x_test)
    score = accuracy_score(y_test, knn_y_pred)
    return knn_model, knn_y_pred, score


best_score = n = 0
knn_best_model = knn_y_pred = None
for i in range(1, 100):
    knn_model, y_pred, score = try_knn(i)
    if score > best_score:
        best_score = score
        knn_best_model = knn_model
        knn_y_pred = y_pred
        n = i
print("Best Score", best_score, n)

Best Score 0.17727272727272728 10


In [65]:
X.shape

(2200, 5)

In [66]:
in_dim = X.shape[1]
model = Sequential()
model.add(Dense(64, input_dim = in_dim, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(1))

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

model.fit(x_train, y_train, epochs = 100, batch_size = 3,verbose=False)
scores = model.evaluate(x_test, y_test)

for i, m in enumerate(model.metrics_names):
    print("\n%s: %.3f"% (m, scores[i]))

  return dispatch_target(*args, **kwargs)



loss: 0.000

accuracy: 0.005
