<a href="https://colab.research.google.com/github/jacobbstephen/ML_Programs/blob/main/SmartCropManagement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection  import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder,  MinMaxScaler
import matplotlib.pyplot as plt

In [None]:
crop_data = pd.read_csv('/content/Crop_recommendation_dataset.csv')

In [None]:
crop_data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


DATA PREPROCESSING

In [None]:
crop_data['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
rice,100
maize,100
jute,100
cotton,100
coconut,100
papaya,100
orange,100
apple,100
muskmelon,100
watermelon,100


In [None]:
crop_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [None]:
print(crop_data.columns)

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')


In [None]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(crop_data['label'])
crop_data['label'] = labels
print(labels)

[20 20 20 ...  5  5  5]


In [None]:
crop_data['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
20,100
11,100
8,100
6,100
4,100
17,100
16,100
0,100
15,100
21,100


Since normalisation is done only to input layer we can seperate input and output

In [None]:
X = crop_data.drop('label', axis = 1)
Y = crop_data['label']
print(Y)

0       20
1       20
2       20
3       20
4       20
        ..
2195     5
2196     5
2197     5
2198     5
2199     5
Name: label, Length: 2200, dtype: int64


In [None]:
# NORMALIZE THE DATA
scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
X.head()



Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,0.642857,0.264286,0.19,0.345886,0.790267,0.466264,0.656458
1,0.607143,0.378571,0.18,0.371445,0.770633,0.54948,0.741675
2,0.428571,0.357143,0.195,0.406854,0.793977,0.674219,0.87571
3,0.528571,0.214286,0.175,0.506901,0.768751,0.540508,0.799905
4,0.557143,0.264286,0.185,0.324378,0.785626,0.641291,0.871231


In [None]:
print(Y)

0       20
1       20
2       20
3       20
4       20
        ..
2195     5
2196     5
2197     5
2198     5
2199     5
Name: label, Length: 2200, dtype: int64


Split and train the model

In [None]:
X_train,X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.2,random_state=2)
model = RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42)
model.fit(X_train, Y_train)

Checking the accuracy

In [None]:
train_data_prediction =  model.predict(X_train)
error_score = metrics.r2_score(Y_train, train_data_prediction)
print("R squared error : ", error_score)

R squared error :  1.0


In [None]:
test_data_prediction =  model.predict(X_test)
error_score = metrics.r2_score(Y_test, test_data_prediction)
print("R squared error : ", error_score)

R squared error :  0.9815972730504611


Predictive System

In [None]:

feature_names = ['N', 'P', 'K', 'temperature',	'humidity',	'ph',	'rainfall']

# Create a DataFrame with the same feature names
input_data = (90, 42, 43, 20.879744, 	82.002744, 	6.502985,	202.935536)
input_data_df = pd.DataFrame([input_data], columns=feature_names)
#normalizing
input_data_df =  pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
# Predict
prediction = model.predict(input_data_df)
prediction = label_encoder.inverse_transform(prediction)
print(prediction[0])


rice


SAVING THE MODEL

In [None]:
import pickle

In [None]:
filename = 'crop_recommendation.sav'
pickle.dump(model, open(filename, 'wb'))