**Step 1: Load and Explore the Data**

In [16]:
import pandas as pd

# Load the dataset
file_path = '/content/Crop_Reco_Data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head(),"\n")
print(data.info(),'\n')
print(data.describe())


    N   P   K  temperature   humidity        ph    rainfall label
0  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
1  86  59  35    25.787206  82.111240  6.946636  243.512041  rice
2  60  55  45    21.408658  83.329319  5.935745  287.576693  rice
3  97  36  45    22.228698  81.858729  6.939084  278.079179  rice
4  90  44  38    23.835095  83.883871  7.473134  241.201351  rice 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            120 non-null    int64  
 1   P            120 non-null    int64  
 2   K            120 non-null    int64  
 3   temperature  120 non-null    float64
 4   humidity     120 non-null    float64
 5   ph           120 non-null    float64
 6   rainfall     120 non-null    float64
 7   label        120 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 7.6+ KB
None 

         

**Step 2: Data Preprocessing** Prepare the data for modeling by handling missing values, encoding categorical variables, and splitting the dataset into training and testing sets.

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Check for missing values
print(data.isnull().sum())

# Encode categorical variables if necessary
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Define features and target variable
X = data.drop('label', axis=1)
y = data['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64


**Step 3: Model Selection and Training**
Choose a machine learning algorithm and train the model. We'll use a Random Forest classifier for this example.

In [18]:
import numpy as np
unique_classes = np.unique(y_test)
target_names=label_encoder.classes_[unique_classes]
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Get unique class labels from y_test or y_pred
unique_classes = np.unique(y_test)  # You can also use np.unique(y_pred)

# Generate classification report using the identified unique classes
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_[unique_classes]))

Accuracy: 95.83%
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00         3
      banana       1.00      1.00      1.00         2
     coconut       1.00      1.00      1.00         3
      cotton       1.00      1.00      1.00         2
       maize       1.00      1.00      1.00         3
       mango       1.00      1.00      1.00         3
   muskmelon       1.00      1.00      1.00         1
      orange       0.50      1.00      0.67         1
      papaya       1.00      1.00      1.00         2
 pomegranate       0.00      0.00      0.00         1
        rice       1.00      1.00      1.00         2
  watermelon       1.00      1.00      1.00         1

    accuracy                           0.96        24
   macro avg       0.88      0.92      0.89        24
weighted avg       0.94      0.96      0.94        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**Step 5: Feature Importance and Visualization**
Understand which features are most important for the model's predictions.

In [None]:
import matplotlib.pyplot as plt
import numpy as np  # Import NumPy

# Feature importance
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]  # Now np is defined

# Plot feature importances
plt.figure(figsize=(10, 6))
plt.title('Feature Importances')
plt.bar(range(X.shape[1]), importances[indices], align='center')
plt.xticks(range(X.shape[1]), [X.columns[i] for i in indices], rotation=90)
plt.tight_layout()
plt.show()

**Step 6: Save the Model**
Save the trained model for future use.

In [None]:
import joblib

# Save the model to a file
joblib.dump(model, 'crop_recommendation_model.pkl') # Use 'model' instead of 'best_model'

**Step 7: Load and Use the Model**
Load the saved model and use it for making predictions on new data.

In [6]:
# Load the model from the file
loaded_model = joblib.load('crop_recommendation_model.pkl
')
# Predict on new data
new_data = [[90, 40, 42, 20.5, 80, 7.0, 200]]  # Example new data
prediction = loaded_model.predict(new_data)
predicted_crop = label_encoder.inverse_transform(prediction)



In [7]:
print(f'Recommended crop: {predicted_crop[0]}')


Recommended crop: jute


the new one


In [10]:
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])
joblib.dump(label_encoder, 'label_encoder.pkl')


['label_encoder.pkl']

In [11]:
model = joblib.load('crop_recommendation_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')


In [22]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv('Crop_Reco_Data.csv')

# Encode categorical variables if necessary
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Define features and target variable
X = data.drop('label', axis=1)
y = data['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Save the model and LabelEncoder
joblib.dump(model, 'crop_recommendation_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

# Command-line interface
if __name__ == "__main__":
    model = joblib.load('crop_recommendation_model.pkl')
    label_encoder = joblib.load('label_encoder.pkl')

    print("Crop Recommendation System")
    n = float(input("Enter Nitrogen content (N) in soil: "))
    p = float(input("Enter Phosphorus content (P) in soil: "))
    k = float(input("Enter Potassium content (K) in soil: "))
    temperature = float(input("Enter temperature in Celsius: "))
    humidity = float(input("Enter humidity in %: "))
    ph = float(input("Enter soil pH: "))
    rainfall = float(input("Enter rainfall in mm: "))

    new_data = [[n, p, k, temperature, humidity, ph, rainfall]]
    prediction = model.predict(new_data)
    predicted_crop = label_encoder.inverse_transform(prediction)
    print(f"Recommended crop: {predicted_crop[0]}")


Crop Recommendation System
Enter Nitrogen content (N) in soil: 89
Enter Phosphorus content (P) in soil: 9
Enter Potassium content (K) in soil: 47
Enter temperature in Celsius: 29.47156
Enter humidity in %: 90.7707
Enter soil pH: 6.668383
Enter rainfall in mm: 28.75226
Recommended crop: muskmelon


