In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
crop = pd.read_csv("Crop_recommendation.csv")

In [3]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
crop['label_encoded'] = label_encoder.fit_transform(crop['label']) + 1
crop_dict = dict(zip(label_encoder.classes_, range(1, len(label_encoder.classes_) + 1)))
print(crop_dict)
crop['label_encoded']=crop['label'].map(crop_dict)

{'apple': 1, 'banana': 2, 'blackgram': 3, 'chickpea': 4, 'coconut': 5, 'coffee': 6, 'cotton': 7, 'grapes': 8, 'jute': 9, 'kidneybeans': 10, 'lentil': 11, 'maize': 12, 'mango': 13, 'mothbeans': 14, 'mungbean': 15, 'muskmelon': 16, 'orange': 17, 'papaya': 18, 'pigeonpeas': 19, 'pomegranate': 20, 'rice': 21, 'watermelon': 22}


In [4]:
crop.drop(['label'],axis=1,inplace=True)
crop.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_encoded
0,90,42,43,20.879744,82.002744,6.502985,202.935536,21
1,85,58,41,21.770462,80.319644,7.038096,226.655537,21
2,60,55,44,23.004459,82.320763,7.840207,263.964248,21
3,74,35,40,26.491096,80.158363,6.980401,242.864034,21
4,78,42,42,20.130175,81.604873,7.628473,262.71734,21


In [5]:
X = crop.drop(['label_encoded'],axis=1)
y = crop['label_encoded']

In [6]:
X

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,90,42,43,20.879744,82.002744,6.502985,202.935536
1,85,58,41,21.770462,80.319644,7.038096,226.655537
2,60,55,44,23.004459,82.320763,7.840207,263.964248
3,74,35,40,26.491096,80.158363,6.980401,242.864034
4,78,42,42,20.130175,81.604873,7.628473,262.717340
...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507
2196,99,15,27,27.417112,56.636362,6.086922,127.924610
2197,118,33,30,24.131797,67.225123,6.362608,173.322839
2198,117,32,34,26.272418,52.127394,6.758793,127.175293


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
ms = MinMaxScaler()

X_train = ms.fit_transform(X_train)
X_test = ms.transform(X_test)

In [9]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
}


for name, md in models.items():
    md.fit(X_train,y_train)
    ypred = md.predict(X_test)
    
    print(f"{name}  with accuracy : {accuracy_score(y_test,ypred)}")

Logistic Regression  with accuracy : 0.9636363636363636
Naive Bayes  with accuracy : 0.9954545454545455
Support Vector Machine  with accuracy : 0.9681818181818181
K-Nearest Neighbors  with accuracy : 0.9568181818181818
Decision Tree  with accuracy : 0.9863636363636363
Random Forest  with accuracy : 0.9931818181818182
Bagging  with accuracy : 0.9886363636363636
AdaBoost  with accuracy : 0.09545454545454546
Gradient Boosting  with accuracy : 0.9818181818181818
Extra Trees  with accuracy : 0.8272727272727273


In [11]:
voting_clf = VotingClassifier(estimators=[(name, model) for name, model in models.items()], voting='hard')

In [12]:
voting_clf.fit(X_train, y_train)
y_pred_ensemble = voting_clf.predict(X_test)

In [13]:
print(f"Ensemble Model with accuracy: {accuracy_score(y_test, y_pred_ensemble)}")

Ensemble Model with accuracy: 0.990909090909091


In [14]:
def recommendation(N, P, K, temperature, humidity, ph, rainfall, top_n=3):
    features = np.array([[N, P, K, temperature, humidity, ph, rainfall]])

    # Use the same scaler objects used during training
    transformed_features = ms.transform(features)
    transformed_features = sc.transform(transformed_features)

    # Use the trained VotingClassifier
    predictions = voting_clf.predict(transformed_features)

    # Check if predictions are integers, if not, convert them to integers
    if not np.issubdtype(predictions.dtype, np.integer):
        predictions = predictions.astype(int)

    # Get the mapping from integer labels to original crop names
    label_to_crop = {label: crop for crop, label in crop_dict.items()}

    # Map the predicted labels to crop names
    crop_names = [label_to_crop[label] for label in predictions]

    # Get top N predictions and their corresponding crop names
    unique_predictions, counts = np.unique(predictions, return_counts=True)
    top_n_indices = np.argsort(counts)[-top_n:][::-1]
    top_n_crops = [label_to_crop[label] for label in unique_predictions[top_n_indices]]

    return top_n_crops

In [15]:

N = 35
P = 24
K = 43
temperature = 26
humidity = 65
ph = 3
rainfall = 125

predicted_crop = recommendation(N, P, K, temperature, humidity, ph, rainfall, top_n=3)
if predicted_crop[0] in crop_dict:
    print("Top Predicted Crop:", predicted_crop)
else:
    print("Sorry, we cannot recommend a suitable crop for this environment")


Top Predicted Crop: ['mango']




In [16]:
import pickle
pickle.dump(voting_clf,open('model.pkl','wb'))
pickle.dump(ms,open('minmaxscaler.pkl','wb'))
pickle.dump(sc,open('standscaler.pkl','wb'))

In [17]:
pickle.dump(crop_dict,open('num_of_crop.pkl','wb'))