In [9]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import pickle

# Load the dataset
data = pd.read_csv('dataset.csv')

# Initialize label encoders for each categorical column except 'fertilizer_availability'
label_encoders = {}
categorical_columns = ['is_rain_falling', 'is_drought', 'political_issues', 'soil_moisture', 'market_demand', 'soil_type', 'pest_infestation', 'season']

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Encode the target variable 'crop'
crop_encoder = LabelEncoder()
data['crop'] = crop_encoder.fit_transform(data['crop'])

# Define features (X) and target (y)
X = data.drop(['crop'], axis=1)
y = data['crop']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models to test
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Logistic Regression': Pipeline([
        ('onehot', ColumnTransformer([('encoder', OneHotEncoder(), categorical_columns)], remainder='passthrough')),
        ('classifier', LogisticRegression(random_state=42))
    ]),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
}

# Train and evaluate each model
model_performance = {}

for model_name, model in models.items():
    print(f"Training {model_name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    model_performance[model_name] = accuracy
    print(f"{model_name} accuracy: {accuracy * 100:.2f}%\n")

# Save the best performing model and encoders
best_model_name = max(model_performance, key=model_performance.get)
best_model = models[best_model_name]

# Save the trained best model to a file using pickle
with open('best_crop_recommendation_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

# Save the label encoders and crop encoder
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump({'label_encoders': label_encoders, 'crop_encoder': crop_encoder}, f)

print(f"Best model ({best_model_name}) has been saved successfully with an accuracy of {model_performance[best_model_name] * 100:.2f}%.")


ModuleNotFoundError: No module named 'xgboost'