In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 1: Load the dataset
file_path = "/mnt/data/crop.csv"
df = pd.read_csv('crop.csv')

# Step 2: Explore the dataset (optional but recommended)
print("Dataset Preview:")
print(df.head())
print("\nDataset Information:")
print(df.info())

# Step 3: Encode the target variable (Crop names to numeric)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Step 4: Feature scaling
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
X = df[features]
y = df['label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Step 6: Define a RandomForest model and set up hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf_model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(
    estimator=rf_model, param_grid=param_grid, cv=3, scoring='accuracy', verbose=1, n_jobs=-1
)

# Step 7: Fit the model using GridSearchCV
print("\nTuning hyperparameters. This might take a while...")
grid_search.fit(X_train, y_train)

# Get the best model from GridSearchCV
best_model = grid_search.best_estimator_

# Step 8: Train the best model on the full training data
best_model.fit(X_train, y_train)

# Step 9: Evaluate the model on the test data
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nOptimized Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Step 10: Save the best model, scaler, and label encoder
joblib.dump(best_model, 'optimized_crop_recommendation_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("\nOptimized model, label encoder, and scaler saved successfully.")

# Step 11: Predict for new data
new_sample = [[90, 42, 43, 20.8797, 82.0027, 6.5029, 202.9355]]  # Replace with actual data
new_sample_scaled = scaler.transform(new_sample)
predicted_crop = label_encoder.inverse_transform(best_model.predict(new_sample_scaled))
print(f"\nRecommended Crop for the input data: {predicted_crop[0]}")


Dataset Preview:
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)



In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 1: Load the dataset
file_path = "/mnt/data/crop.csv"
df = pd.read_csv('crop.csv')

# Step 2: Encode the target variable (Crop names to numeric)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Step 3: Feature scaling
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
X = df[features]
y = df['label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Step 5: Define a RandomForest model and hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf_model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(
    estimator=rf_model, param_grid=param_grid, cv=3, scoring='accuracy', verbose=1, n_jobs=-1
)

# Step 6: Fit the model using GridSearchCV
print("\nTuning hyperparameters. Please wait...")
grid_search.fit(X_train, y_train)

# Get the best model from GridSearchCV
best_model = grid_search.best_estimator_

# Step 7: Train the best model on the full training data
best_model.fit(X_train, y_train)

# Step 8: Evaluate the model on the test data
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nOptimized Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Step 9: Save the best model, scaler, and label encoder
joblib.dump(best_model, 'optimized_crop_recommendation_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("\nOptimized model, label encoder, and scaler saved successfully.")

# Step 10: User Interaction for Predictions
def predict_crop():
    print("\nEnter the soil and environmental characteristics:")
    try:
        N = float(input("Nitrogen content (N): "))
        P = float(input("Phosphorus content (P): "))
        K = float(input("Potassium content (K): "))
        temperature = float(input("Temperature (°C): "))
        humidity = float(input("Humidity (%): "))
        ph = float(input("pH level: "))
        rainfall = float(input("Rainfall (mm): "))

        # Create input sample and scale it
        new_sample = [[N, P, K, temperature, humidity, ph, rainfall]]
        new_sample_scaled = scaler.transform(new_sample)

        # Predict the crop
        predicted_crop = label_encoder.inverse_transform(best_model.predict(new_sample_scaled))
        print(f"\nRecommended Crop: {predicted_crop[0]}")

    except ValueError:
        print("Invalid input. Please enter numeric values.")

# Step 11: Run the interactive prediction function
while True:
    predict_crop()
    again = input("\nWould you like to predict another crop? (yes/no): ").strip().lower()
    if again != 'yes':
        print("\nThank you for using the Crop Recommendation System. Goodbye!")
        break



Tuning hyperparameters. Please wait...
Fitting 3 folds for each of 162 candidates, totalling 486 fits

Optimized Model Accuracy: 99.32%

Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      0.95      0.97        20
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        20
   mothbeans       0.95      1.00      0.98        20
    mungbean       1.00     




Recommended Crop: coffee

Thank you for using the Crop Recommendation System. Goodbye!
