In [122]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,r2_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.impute import SimpleImputer
import pickle

In [55]:
!pip install xgboost



In [56]:
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv("Crop_Recommendation.csv")

# Define realistic soil moisture ranges for each crop
crop_soil_moisture_ranges = {
    "rice": (700, 900),
    "maize": (400, 600),
    "chickpea": (300, 500),
    "kidneybeans": (350, 500),
    "pigeonpeas": (350, 500),
    "mothbeans": (300, 450),
    "mungbean": (300, 450),
    "blackgram": (300, 450),
    "lentil": (300, 450),
    "pomegranate": (250, 400),
    "banana": (600, 800),
    "mango": (250, 400),
    "grapes": (300, 500),
    "watermelon": (350, 550),
    "muskmelon": (350, 550),
    "apple": (250, 450),
    "orange": (250, 450),
    "papaya": (500, 700),
    "coconut": (60, 800),
    "cotton": (400, 600),
    "jute": (450, 650),
    "coffee": (400, 600)
}

# Function to assign realistic soil moisture values
def generate_soil_moisture(crop):
    low, high = crop_soil_moisture_ranges.get(crop.lower(), (350, 600))
    return np.random.randint(low, high + 1)

# Apply function to Crop column
df["SoilMoisture"] = df["Crop"].apply(generate_soil_moisture)

# Save the updated dataset
df.to_csv("Crop_Recommendation_with_SoilMoisture.csv", index=True)

# Generate random growth stages between 1 and 4 for 2200 samples
np.random.seed(42)
growth_stages = np.random.randint(1, 5, size=2200)

# Add to DataFrame
df['GrowthStage'] = growth_stages


In [57]:
df

Unnamed: 0,Nitrogen,Phosphorus,Potassium,Temperature,Humidity,pH_Value,Rainfall,Crop,SoilMoisture,GrowthStage
0,90,42,43,20.879744,82.002744,6.502985,202.935536,Rice,704,3
1,85,58,41,21.770462,80.319644,7.038096,226.655537,Rice,753,4
2,60,55,44,23.004459,82.320763,7.840207,263.964248,Rice,746,1
3,74,35,40,26.491096,80.158363,6.980401,242.864034,Rice,876,3
4,78,42,42,20.130175,81.604873,7.628473,262.717340,Rice,708,3
...,...,...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,Coffee,483,2
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,Coffee,539,3
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,Coffee,436,4
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,Coffee,441,2


In [58]:
df.isnull().sum()

Nitrogen        0
Phosphorus      0
Potassium       0
Temperature     0
Humidity        0
pH_Value        0
Rainfall        0
Crop            0
SoilMoisture    0
GrowthStage     0
dtype: int64

In [59]:
df.duplicated().sum()

0

In [60]:
X=df[['Temperature','Humidity','SoilMoisture','GrowthStage']]
Y=df[['Nitrogen','Phosphorus','Potassium']]

In [61]:
X

Unnamed: 0,Temperature,Humidity,SoilMoisture,GrowthStage
0,20.879744,82.002744,704,3
1,21.770462,80.319644,753,4
2,23.004459,82.320763,746,1
3,26.491096,80.158363,876,3
4,20.130175,81.604873,708,3
...,...,...,...,...
2195,26.774637,66.413269,483,2
2196,27.417112,56.636362,539,3
2197,24.131797,67.225123,436,4
2198,26.272418,52.127394,441,2


In [62]:
Y

Unnamed: 0,Nitrogen,Phosphorus,Potassium
0,90,42,43
1,85,58,41
2,60,55,44
3,74,35,40
4,78,42,42
...,...,...,...
2195,107,34,32
2196,99,15,27
2197,118,33,30
2198,117,32,34


In [63]:
imputer_X = SimpleImputer(strategy='mean')
X_imputed = imputer_X.fit_transform(X)

imputer_y = SimpleImputer(strategy='mean')
Y_imputed = imputer_y.fit_transform(Y)

In [64]:
optimal_nutrients = {
    1: {'Nitrogen': 10, 'Phosphorus': 5, 'Potassium': 8},
    2: {'Nitrogen': 15, 'Phosphorus': 7, 'Potassium': 12},
    3: {'Nitrogen': 20, 'Phosphorus': 10, 'Potassium': 15},
    4: {'Nitrogen': 12, 'Phosphorus': 8, 'Potassium': 18},
}


In [65]:
x_train,x_test,y_train,y_test=train_test_split(X_imputed,Y_imputed,test_size=0.2,random_state=42)

In [66]:
x_train

array([[ 16.39624284,  92.18151927, 311.        ,   2.        ],
       [ 27.54384835,  69.3478631 , 384.        ,   2.        ],
       [ 27.52185591,  63.13215259, 396.        ,   1.        ],
       ...,
       [ 27.92063282,  51.77965917, 373.        ,   2.        ],
       [ 13.42988625,  80.06633966, 391.        ,   4.        ],
       [ 23.97081395,  62.35557553, 378.        ,   4.        ]])

In [67]:
x_test

array([[ 29.49401389,  94.72981338, 473.        ,   1.        ],
       [ 26.1793464 ,  86.52258079, 492.        ,   4.        ],
       [ 43.36051537,  93.35191636, 554.        ,   3.        ],
       ...,
       [ 23.60564038,  79.29573149, 418.        ,   2.        ],
       [ 22.94276687,  75.37170612, 491.        ,   1.        ],
       [ 19.04380471,  33.10695144, 468.        ,   1.        ]])

In [68]:
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.fit_transform(x_test)

In [69]:
x_train_scaled

array([[-1.80469186,  0.93658618, -1.06438295, -0.42225092],
       [ 0.38599999, -0.10047048, -0.52047338, -0.42225092],
       [ 0.38167811, -0.38277499, -0.43106358, -1.31654889],
       ...,
       [ 0.46004447, -0.89838138, -0.60243235, -0.42225092],
       [-2.38763079,  0.38634019, -0.46831766,  1.36634503],
       [-0.31616132, -0.41804549, -0.56517827,  1.36634503]])

In [70]:
x_test_scaled

array([[ 0.75313559,  1.0157714 ,  0.10753164, -1.28053189],
       [ 0.08410389,  0.66193645,  0.25161956,  1.35233742],
       [ 3.55194693,  0.95636671,  0.72180119,  0.47471432],
       ...,
       [-0.43537232,  0.35036835, -0.30956497, -0.40290879],
       [-0.56916655,  0.1811935 ,  0.24403599, -1.28053189],
       [-1.35613214, -1.640949  ,  0.06961377, -1.28053189]])

In [71]:
model=RandomForestRegressor(n_estimators=100,random_state=42)

In [72]:
model.fit(x_train_scaled,y_train)

In [73]:
model_pred=model.predict(x_test_scaled)

In [74]:
model_pred

array([[ 92.78,  16.73,  46.12],
       [ 90.08,  30.43,  50.5 ],
       [ 56.09,  62.1 ,  51.09],
       ...,
       [102.27,  51.45,  36.73],
       [104.13,  46.8 ,  25.23],
       [ 24.32,  67.1 ,  21.18]])

In [75]:
error=mean_absolute_error(model_pred,y_test)
r2Score=r2_score(model_pred,y_test)


In [76]:
error,r2Score

(13.66355303030302, 0.5192308242303029)

In [77]:
model=LinearRegression()

In [78]:
model.fit(x_train_scaled,y_train)

In [79]:
y_pred=model.predict(x_test_scaled)

In [80]:
y_pred

array([[57.02429714, 48.06153603, 53.27839888],
       [57.31517425, 50.37729764, 53.7999974 ],
       [65.51030073, 37.65492654, 19.45000792],
       ...,
       [46.72569126, 53.68587101, 58.94219304],
       [57.2182556 , 55.94182865, 55.62870581],
       [47.12139953, 64.81052654, 41.44943417]])

In [81]:
error=mean_absolute_error(y_test,y_pred)

In [82]:
error

25.943895797856836

In [83]:
model=KNeighborsClassifier(n_neighbors=2,weights="uniform")

In [84]:
model.fit(x_train_scaled,y_train)

In [85]:
y_pred=model.predict(x_test_scaled)

In [86]:
y_pred

array([[ 98.,  22.,  47.],
       [ 85.,  27.,  39.],
       [ 31.,  49.,  45.],
       ...,
       [100.,  41.,  18.],
       [120.,  48.,  16.],
       [ 12.,  61.,  18.]])

In [87]:
mean_absolute_error(y_test,y_pred)

20.502272727272725

In [88]:
model=KNeighborsClassifier(n_neighbors=2,weights="distance")

In [89]:
model.fit(x_train_scaled,y_train)

In [90]:
y_pred=model.predict(x_test_scaled)

In [91]:
y_pred

array([[ 98.,  22.,  47.],
       [ 86.,  40.,  39.],
       [ 31.,  68.,  45.],
       ...,
       [111.,  41.,  18.],
       [127.,  53.,  24.],
       [ 37.,  72.,  18.]])

In [92]:
mean_absolute_error(y_test,y_pred)

19.504545454545454

In [93]:
model=DecisionTreeClassifier(random_state=42)

In [94]:
model.fit(x_train_scaled,y_train)

In [95]:
y_pred=model.predict(x_test_scaled)

In [96]:
y_pred

array([[117.,  25.,  54.],
       [108.,  22.,  46.],
       [ 60.,  58.,  51.],
       ...,
       [118.,  45.,  23.],
       [127.,  53.,  24.],
       [ 34.,  56.,  17.]])

In [97]:
mean_absolute_error(y_test,y_pred)

16.13712121212121

In [98]:
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor

# Wrap XGBRegressor in MultiOutput
xgb = XGBRegressor()
multi_output_model = MultiOutputRegressor(xgb)

# Fit model
multi_output_model.fit(x_train_scaled, y_train)


In [99]:
test_data=pd.DataFrame({"Temperature":[26],"Humidity":[65],"SoilMoisture":[40],"GrowthStage":[2]})

In [100]:
test_data_imputed = imputer_X.transform(test_data)
test_data_scaled = scaler.transform(test_data_imputed)

In [112]:
prediction = model.predict(test_data_scaled)
predicted_nitrogen = prediction[0][0]
predicted_phosphorus = prediction[0][1]
predicted_potassium = prediction[0][2]
print(predicted_nitrogen)
print(predicted_phosphorus)
print(predicted_potassium)

12.0
58.0
23.0


In [113]:
test_data_np = test_data.to_numpy()
test_data_scaled = scaler.transform(test_data_np)

In [114]:
current_nutrients = {'Nitrogen': 8, 'Phosphorus': 4, 'Potassium': 7}

# Determine the growth stage
growth_stage = int(test_data.iloc[0]['GrowthStage'])
print(growth_stage)

2


In [115]:
optimal = optimal_nutrients.get(growth_stage, {'Nitrogen': 0, 'Phosphorus': 0, 'Potassium': 0})
print(optimal)
optimal_nitrogen = optimal['Nitrogen']
optimal_phosphorus = optimal['Phosphorus']
optimal_potassium = optimal['Potassium']

{'Nitrogen': 15, 'Phosphorus': 7, 'Potassium': 12}


In [118]:
def get_action(predicted, current, optimal, name):
    difference = predicted - current
    if abs(difference) <= 1:
        alert = "Optimal"
        action = f"No need to change {name} levels."
    elif difference > 1:
        alert = "Increase"
        action = f"Increase {name} by {difference:.2f} grams/m²."
    else:
        alert = "Decrease"
        action = f"Reduce {name} by {abs(difference):.2f} grams/m²."
    
    return alert, action


alert_nitrogen, action_nitrogen = get_action(predicted_nitrogen, current_nutrients['Nitrogen'], optimal_nitrogen, "Nitrogen")
alert_phosphorus, action_phosphorus = get_action(predicted_phosphorus, current_nutrients['Phosphorus'], optimal_phosphorus, "Phosphorus")
alert_potassium, action_potassium = get_action(predicted_potassium, current_nutrients['Potassium'], optimal_potassium, "Potassium")

In [120]:
print(f"Predicted Nitrogen Need: {predicted_nitrogen:.2f} grams/m² (Alert: {alert_nitrogen})")
print(f"→ Action: {action_nitrogen}\n")

print(f"Predicted Phosphorus Need: {predicted_phosphorus:.2f} grams/m² (Alert: {alert_phosphorus})")
print(f"→ Action: {action_phosphorus}\n")

print(f"Predicted Potassium Need: {predicted_potassium:.2f} grams/m² (Alert: {alert_potassium})")
print(f"→ Action: {action_potassium}")

Predicted Nitrogen Need: 12.00 grams/m² (Alert: Increase)
→ Action: Increase Nitrogen by 4.00 grams/m².

Predicted Phosphorus Need: 58.00 grams/m² (Alert: Increase)
→ Action: Increase Phosphorus by 54.00 grams/m².

Predicted Potassium Need: 23.00 grams/m² (Alert: Increase)
→ Action: Increase Potassium by 16.00 grams/m².


In [123]:
with open('data.pkl', 'wb') as file:
    pickle.dump(model, file)