In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

data = pd.read_csv('crops.csv')

In [2]:
data.head()

Unnamed: 0,Soil_color,Nitrogen,Phosphorus,Potassium,Rainfall,Temperature,Crop,Fertilizer
0,Black,75,50,100,1000,20,Sugarcane,Urea
1,Black,80,50,100,1000,20,Sugarcane,Urea
2,Black,85,50,100,1000,20,Sugarcane,Urea
3,Black,90,50,100,1000,20,Sugarcane,Urea
4,Black,95,50,100,1000,20,Sugarcane,Urea


In [3]:
data.isnull().sum()

Soil_color     0
Nitrogen       0
Phosphorus     0
Potassium      0
Rainfall       0
Temperature    0
Crop           0
Fertilizer     0
dtype: int64

In [5]:
data.shape

(4513, 8)

In [6]:
mappings = {}

for column in data.select_dtypes(include=['object']).columns:
    data[column] = data[column].astype('category')
    mappings[column] = dict(enumerate(data[column].cat.categories))
    data[column] = data[column].cat.codes

print(data.head())
print("\nMappings:\n")
for column, mapping in mappings.items():
    print(f"{column}: {mapping}")

   Soil_color  Nitrogen  Phosphorus  Potassium  Rainfall  Temperature  Crop  \
0           1        75          50        100      1000           20    11   
1           1        80          50        100      1000           20    11   
2           1        85          50        100      1000           20    11   
3           1        90          50        100      1000           20    11   
4           1        95          50        100      1000           20    11   

   Fertilizer  
0          18  
1          18  
2          18  
3          18  
4          18  

Mappings:

Soil_color: {0: 'Alluvial', 1: 'Black', 2: 'Clay soil', 3: 'Red', 4: 'Red ', 5: 'Reddish Brown'}
Crop: {0: 'Cotton', 1: 'Ginger', 2: 'Gram', 3: 'Grapes', 4: 'Groundnut', 5: 'Jowar', 6: 'Maize', 7: 'Masoor', 8: 'Moong', 9: 'Rice', 10: 'Soybean', 11: 'Sugarcane', 12: 'Tur', 13: 'Turmeric', 14: 'Urad', 15: 'Wheat'}
Fertilizer: {0: ' NPK', 1: '10:10:10 NPK', 2: '10:26:26 NPK', 3: '12:32:16 NPK', 4: '13:32:26 NPK', 5: 

(3610, 7) (903, 7) (3610,) (903,)


In [11]:
x = data[['Soil_color', 'Rainfall', 'Temperature']]
y_nitro = data['Nitrogen']
y_phos = data['Phosphorus']
y_pot = data['Potassium']
y_fert = data['Fertilizer']

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data.drop('Crop', axis=1), data['Crop'], test_size=0.2, random_state=42)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import *


model_nitro = [
    make_pipeline(StandardScaler(), LinearRegression()),
    make_pipeline(StandardScaler(), Ridge()),
    make_pipeline(StandardScaler(), Lasso()),
    make_pipeline(StandardScaler(), DecisionTreeRegressor()),
    make_pipeline(StandardScaler(), RandomForestRegressor()),
    make_pipeline(StandardScaler(), GradientBoostingRegressor()),
    make_pipeline(StandardScaler(), SVR())
]


best_mse = float('inf')
best_r2 = 0
best_regressor = None
for model_nitro in model_nitro:
    model_nitro.fit(x, y_nitro)
    predictions = model_nitro.predict(x)
    mse = mean_squared_error(y_nitro, predictions)
    r2 = r2_score(y_nitro, predictions)
    if mse < best_mse:
        best_mse = mse
        best_regressor = model_nitro
        best_r2 = r2
        print(best_regressor, ":",best_r2)

print("___________________________________________________________________________________________________________________")
print(f"Best Regressor: {best_regressor}")
print(f"Best MSE: {best_mse}")
print(f"Best R2: {best_r2}")

import pickle
with open('model_nitro.pkl', 'wb') as file:
    pickle.dump(best_regressor, file)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())]) : 0.114768383278681
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())]) : 0.5837574460095003
___________________________________________________________________________________________________________________
Best Regressor: Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())])
Best MSE: 602.8407461801172
Best R2: 0.5837574460095003


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import *


model_phos = [
    make_pipeline(StandardScaler(), LinearRegression()),
    make_pipeline(StandardScaler(), Ridge()),
    make_pipeline(StandardScaler(), Lasso()),
    make_pipeline(StandardScaler(), DecisionTreeRegressor()),
    make_pipeline(StandardScaler(), RandomForestRegressor()),
    make_pipeline(StandardScaler(), GradientBoostingRegressor()),
    make_pipeline(StandardScaler(), SVR())
]


best_mse = float('inf')
best_r2 = 0
best_regressor = None
for model_phos in model_phos:
    model_phos.fit(x, y_phos)
    predictions = model_phos.predict(x)
    mse = mean_squared_error(y_phos, predictions)
    r2 = r2_score(y_phos, predictions)
    if mse < best_mse:
        best_mse = mse
        best_regressor = model_phos
        best_r2 = r2
        print(best_regressor, ":",best_r2)

print("___________________________________________________________________________________________________________________")
print(f"Best Regressor: {best_regressor}")
print(f"Best MSE: {best_mse}")
print(f"Best R2: {best_r2}")

import pickle
with open('model_phos.pkl', 'wb') as file:
    pickle.dump(best_regressor, file)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())]) : 0.07537444728521481
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())]) : 0.5185595797008713
___________________________________________________________________________________________________________________
Best Regressor: Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())])
Best MSE: 131.87023782583648
Best R2: 0.5185595797008713


In [17]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import *


model_pot = [
    make_pipeline(StandardScaler(), LinearRegression()),
    make_pipeline(StandardScaler(), Ridge()),
    make_pipeline(StandardScaler(), Lasso()),
    make_pipeline(StandardScaler(), DecisionTreeRegressor()),
    make_pipeline(StandardScaler(), RandomForestRegressor()),
    make_pipeline(StandardScaler(), GradientBoostingRegressor()),
    make_pipeline(StandardScaler(), SVR())
]


best_mse = float('inf')
best_r2 = 0
best_regressor = None
for model_pot in model_pot:
    model_pot.fit(x, y_pot)
    predictions = model_pot.predict(x)
    mse = mean_squared_error(y_pot, predictions)
    r2 = r2_score(y_pot, predictions)
    if mse < best_mse:
        best_mse = mse
        best_regressor = model_pot
        best_r2 = r2
        print(best_regressor, ":",best_r2)

print("___________________________________________________________________________________________________________________")
print(f"Best Regressor: {best_regressor}")
print(f"Best MSE: {best_mse}")
print(f"Best R2: {best_r2}")

import pickle
with open('model_pot.pkl', 'wb') as file:
    pickle.dump(best_regressor, file)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())]) : 0.21008154414824887
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())]) : 0.7068824962799305
___________________________________________________________________________________________________________________
Best Regressor: Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())])
Best MSE: 373.3233232084687
Best R2: 0.7068824962799305


In [18]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import *


model_fert = [
    make_pipeline(StandardScaler(), LinearRegression()),
    make_pipeline(StandardScaler(), Ridge()),
    make_pipeline(StandardScaler(), Lasso()),
    make_pipeline(StandardScaler(), DecisionTreeRegressor()),
    make_pipeline(StandardScaler(), RandomForestRegressor()),
    make_pipeline(StandardScaler(), GradientBoostingRegressor()),
    make_pipeline(StandardScaler(), SVR())
]


best_mse = float('inf')
best_r2 = 0
best_regressor = None
for model_fert in model_fert:
    model_fert.fit(x, y_fert)
    predictions = model_fert.predict(x)
    mse = mean_squared_error(y_fert, predictions)
    r2 = r2_score(y_fert, predictions)
    if mse < best_mse:
        best_mse = mse
        best_regressor = model_fert
        best_r2 = r2
        print(best_regressor, ":",best_r2)

print("___________________________________________________________________________________________________________________")
print(f"Best Regressor: {best_regressor}")
print(f"Best MSE: {best_mse}")
print(f"Best R2: {best_r2}")

import pickle
with open('model_fert.pkl', 'wb') as file:
    pickle.dump(best_regressor, file)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())]) : 0.022914936507134498
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())]) : 0.19623865301571597
___________________________________________________________________________________________________________________
Best Regressor: Pipeline(steps=[('standardscaler', StandardScaler()),
                ('decisiontreeregressor', DecisionTreeRegressor())])
Best MSE: 20.310789622019723
Best R2: 0.19623865301571597
