In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [8]:
import seaborn as sns

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [9]:
df = pd.read_csv('croprec.csv')

In [10]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [11]:
df.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [12]:
df.isnull().any()

N              False
P              False
K              False
temperature    False
humidity       False
ph             False
rainfall       False
label          False
dtype: bool

In [13]:
# List all the crops that we have
print('Crop count: ', len(df['label'].unique()))
print('Crop labels: ', df['label'].unique())

Crop count:  22
Crop labels:  ['rice' 'maize' 'chickpea' 'kidneybeans' 'pigeonpeas' 'mothbeans'
 'mungbean' 'blackgram' 'lentil' 'pomegranate' 'banana' 'mango' 'grapes'
 'watermelon' 'muskmelon' 'apple' 'orange' 'papaya' 'coconut' 'cotton'
 'jute' 'coffee']


In [14]:
crop_sum = df.pivot_table(index=['label'], aggfunc='mean')
crop_sum.head()

Unnamed: 0_level_0,K,N,P,humidity,ph,rainfall,temperature
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
apple,199.89,20.8,134.22,92.333383,5.929663,112.654779,22.630942
banana,50.05,100.23,82.01,80.358123,5.983893,104.62698,27.376798
blackgram,19.24,40.02,67.47,65.118426,7.133952,67.884151,29.97334
chickpea,79.92,40.09,67.79,16.860439,7.336957,80.058977,18.872847
coconut,30.59,21.98,16.93,94.844272,5.976562,175.686646,27.409892


In [15]:
# Nitrogen Analysis

crop_sum_nitrogen = crop_sum.sort_values(by='N', ascending=False)

fig = make_subplots(rows=1, cols=2, subplot_titles=('Nitrogen',))

top = {
    'y': crop_sum_nitrogen['N'].head(10).sort_values().index,
    'x': crop_sum_nitrogen['N'].head(10).sort_values(),
}

last = {
    'y': crop_sum_nitrogen['N'].tail(10).sort_values().index,
    'x': crop_sum_nitrogen['N'].tail(10).sort_values(),
}

fig.add_trace(go.Bar(top, name='Most Nitrogen Required', orientation='h', text=top['x'], textposition='auto'), row=1, col=1)
fig.add_trace(go.Bar(last, name='Least Nitrogen Required', orientation='h', text=last['x'], textposition='auto'), row=1, col=2)
fig.show()


In [16]:
# Modeling

X = df.drop('label', axis=1)
Y = df['label']

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, shuffle=True)

In [19]:
%pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-3.3.1-py3-none-win_amd64.whl (1.0 MB)
Installing collected packages: lightgbm
Successfully installed lightgbm-3.3.1
Note: you may need to restart the kernel to use updated packages.


In [20]:
import lightgbm as lgb

In [21]:
model = lgb.LGBMClassifier()

In [22]:
model.fit(X_train, Y_train)

LGBMClassifier()

In [23]:
y_pred = model.predict(X_test)

In [26]:
from sklearn.metrics import accuracy_score

accuracy=accuracy_score(y_pred, Y_test)
print('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy_score(Y_test, y_pred)))

LightGBM Model accuracy score: 0.9758


In [27]:
import pickle

In [29]:
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [36]:
with open('model.pkl', 'rb') as f:
    model2 = pickle.load(f)
df.iloc[0]

N                   90
P                   42
K                   43
temperature    20.8797
humidity       82.0027
ph             6.50299
rainfall       202.936
label             rice
Name: 0, dtype: object

In [37]:
pdata = [90, 42, 43, 20, 82, 6,202]

model2.predict([pdata])

array(['rice'], dtype=object)

In [41]:
model.predict_proba([pdata])

array([[3.01299347e-07, 4.06088098e-07, 1.05455933e-07, 2.84886999e-07,
        3.09031348e-07, 3.86031080e-07, 2.64032406e-07, 3.06862606e-07,
        1.34866129e-06, 3.46682202e-07, 1.03522017e-07, 3.27615877e-07,
        1.75267842e-07, 9.10153199e-08, 2.41820072e-07, 2.85477398e-07,
        2.94896225e-07, 2.04551004e-07, 2.29112991e-07, 2.60254711e-07,
        9.99993392e-01, 3.35242156e-07]])