In [None]:
%pip install numpy pandas
%pip install lazypredict
%pip install matplotlib

In [None]:
import numpy as np
import pandas as pd
import os

# Define the path where the dataset is stored
dataset_path = "./input/crop-recommendation-dataset/Crop_Recommendation.csv"

# Check if the file exists before loading
if os.path.exists(dataset_path):
    data = pd.read_csv(dataset_path)
    print("Dataset loaded successfully.")
else:
    raise FileNotFoundError(f"Dataset not found at {dataset_path}. Please check the file path.")

# Splitting Features and Target Variable
X = data.drop('Crop', axis=1)  # Features
y = data['Crop']  # Target variable

print("Data preview:")
print(data.head())


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)


# Import libraries
import lazypredict
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split



# Defines and builds the lazyclassifier
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models_train,predictions_train = clf.fit(X_train, X_train, y_train, y_train)
models_test,predictions_test = clf.fit(X_train, X_test, y_train, y_test)

# Prints the model performance
models_train




In [None]:
models_test


In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()

rf.fit(X_train, y_train)

rf_score = rf.score(X_test, y_test)

rf_score

In [None]:
# Save the trained model

import joblib

model_filename = "random_forest_model.pkl"
joblib.dump(rf, model_filename)
print(f"Model saved as {model_filename}")

In [None]:
import joblib
import numpy as np
import pandas as pd

# Load the model
model = joblib.load("random_forest_model.pkl")


input = (160, 85, 59, 20, 30, 7.5, 70)  # Nitrogen, Phosphorous, Potassium, Temperature, Humidity, pH, Rainfall

input_array = np.asarray(input)

input_array_reshape = input_array.reshape(1, -1)

predictions = model.predict(input_array_reshape)
print (predictions)

print("predictions are: ", predictions[0])

In [None]:
import pandas as pd

# Load the dataset (Make sure to update the correct path)
dataset_path = "./input/crop-recommendation-dataset/Crop_Recommendation.csv"

# Read the dataset
data = pd.read_csv(dataset_path)

# Extract the target variable (crop types)
y = data['Crop']

# Get the unique crop classes
unique_classes = y.unique()

# Count the number of unique crops
num_classes = len(unique_classes)

# Display the number of classes and their names
print(f"Number of crop classes: {num_classes}")
print("Crop classes:")
for crop in unique_classes:
    print("-", crop)


ENHANCED CODE

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset (ensure this path is correct)
dataset_path = "./input/crop-recommendation-dataset/Crop_Recommendation.csv"
data = pd.read_csv(dataset_path)

# Plot the distribution of crops
plt.figure(figsize=(12, 6))
data['Crop'].value_counts().plot(kind='bar', color='skyblue', edgecolor='black')
plt.xlabel("Crop Type")
plt.ylabel("Count")
plt.title("Distribution of Crops in the Dataset")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()


In [None]:
import numpy as np

# Example input (change these values to test)
input_data = np.array([[50, 40, 40, 30, 60, 6.5, 200]])  # Reshaped to (1, 7)

# Predict probabilities for all 22 classes
probabilities = rf.predict_proba(input_data)  # Get probability distribution

# Get class names from dataset
class_labels = data['Crop'].unique()

# Display probabilities for each class
print("Prediction Confidence for Each Crop:")
for crop, prob in zip(class_labels, probabilities[0]):
    print(f"{crop}: {prob:.4f}")

# Get the most probable crop
predicted_crop = class_labels[np.argmax(probabilities)]
print(f"\nFinal Prediction: {predicted_crop} (Confidence: {np.max(probabilities):.4f})")


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the dataset (Make sure the path is correct)
dataset_path = "./input/crop-recommendation-dataset/Crop_Recommendation.csv"
data = pd.read_csv(dataset_path)

# Prepare features and target variable
X = data.drop("Crop", axis=1)
y = data["Crop"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

# Train the Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Example input (modify values to test)
input_data = np.array([[50, 40, 40, 30, 60, 6.5, 200]])  # Nitrogen, Phosphorous, Potassium, Temperature, Humidity, pH, Rainfall

# Predict probabilities for all 22 classes
probabilities = rf.predict_proba(input_data)  # Get probability distribution

# Get class names from dataset
class_labels = rf.classes_  # Use classes_ from trained model

# Sort crops by confidence score (highest to lowest)
sorted_indices = np.argsort(probabilities[0])[::-1]
sorted_probs = probabilities[0][sorted_indices]
sorted_labels = class_labels[sorted_indices]

# Plot the confidence scores as a bar chart
plt.figure(figsize=(12, 6))
plt.barh(sorted_labels, sorted_probs, color='skyblue', edgecolor='black')
plt.xlabel("Confidence Score")
plt.ylabel("Crop Type")
plt.title("Prediction Confidence for Each Crop")
plt.xlim(0, 1)  # Probabilities range from 0 to 1
plt.gca().invert_yaxis()  # Flip so highest confidence is at the top
plt.grid(axis='x', linestyle='--', alpha=0.7)

# Show the final prediction prominently
predicted_crop = sorted_labels[0]
confidence = sorted_probs[0]
plt.annotate(f"Predicted: {predicted_crop} ({confidence:.4f})", xy=(confidence, 0), 
             xytext=(confidence - 0.2, 2), fontsize=12, color='red', fontweight='bold')

plt.show()


In [None]:
import panel as pn
import pandas as pd
import numpy as np
import matplotlib
import sklearn
import joblib

# lazypredict may not be installed in all environments
try:
    import lazypredict
except ImportError:
    lazypredict = None

print("Library Version Information:")
print(f"panel: {pn.__version__}")
print(f"pandas: {pd.__version__}")
print(f"numpy: {np.__version__}")
print(f"matplotlib: {matplotlib.__version__}")
print(f"scikit-learn: {sklearn.__version__}")
print(f"joblib: {joblib.__version__}")
if lazypredict:
    print(f"lazypredict: {lazypredict.__version__}")
else:
    print("lazypredict: Not installed")
