In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Upload dataset
uploaded = files.upload()

# Load the dataset
df = pd.read_csv(next(iter(uploaded)))

# Drop the Date column
if "Date" in df.columns:
    df.drop(columns=["Date"], inplace=True)

# Encode categorical variables
label_encoders = {}
for col in ["Crop_Type", "Soil_Type"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Define Features and Target Variable
features = ["Crop_Type", "Soil_Type", "Soil_pH", "Temperature", "Humidity",
            "Wind_Speed", "N", "P", "K", "Soil_Quality"]
target = "Crop_Yield"

X = df[features]
y = df[target]

# Split Data into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Saving crop_yield_dataset.csv to crop_yield_dataset.csv


In [None]:
# Train a Random Forest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")


Mean Absolute Error: 2.38


In [None]:
def chatbot():
    print("\nWelcome to the Crop Yield Prediction Chatbot!")
    print("Answer the following questions to predict crop yield.\n")

    # Get user inputs
    crop_name = input("Enter Crop Type (e.g., Wheat, Rice, Corn): ").strip().capitalize()
    soil_type = input("Enter Soil Type (e.g., Sandy, Loamy, Peaty): ").strip().capitalize()
    soil_pH = float(input("Enter Soil pH value (e.g., 5.5, 6.2): "))
    temperature = float(input("Enter Temperature (°C): "))
    humidity = float(input("Enter Humidity (%): "))
    wind_speed = float(input("Enter Wind Speed (km/h): "))
    nitrogen = float(input("Enter Nitrogen (N) level: "))
    phosphorus = float(input("Enter Phosphorus (P) level: "))
    potassium = float(input("Enter Potassium (K) level: "))
    soil_quality = float(input("Enter Soil Quality Score: "))

    # Encode categorical inputs
    if crop_name in label_encoders["Crop_Type"].classes_:
        crop_encoded = label_encoders["Crop_Type"].transform([crop_name])[0]
    else:
        print("Crop Type not recognized! Try again.")
        return

    if soil_type in label_encoders["Soil_Type"].classes_:
        soil_encoded = label_encoders["Soil_Type"].transform([soil_type])[0]
    else:
        print("Soil Type not recognized! Try again.")
        return

    # Convert input into DataFrame
    user_input_df = pd.DataFrame([[crop_encoded, soil_encoded, soil_pH, temperature,
                                   humidity, wind_speed, nitrogen, phosphorus, potassium, soil_quality]],
                                 columns=features)

    # Predict Crop Yield
    predicted_yield = model.predict(user_input_df)[0]
    print(f"\nPredicted Crop Yield: {predicted_yield:.2f} tons per hectare")

    # Find the three crops with the lowest predicted yield
    df["Predicted_Yield"] = model.predict(X)
    avg_yield_per_crop = df.groupby("Crop_Type")["Predicted_Yield"].mean()

    # Convert crop index back to names
    avg_yield_per_crop.index = label_encoders["Crop_Type"].inverse_transform(avg_yield_per_crop.index)

    # Add user's crop prediction to the list
    avg_yield_per_crop[crop_name] = predicted_yield

    # Sort and get lowest three
    lowest_yield_crops = avg_yield_per_crop.nsmallest(3)

    print("\nCrops with the Lowest Predicted Yield:")
    for crop, yield_val in lowest_yield_crops.items():
        print(f"- {crop}: {yield_val:.2f} tons/ha")

# Run chatbot
chatbot()



Welcome to the Crop Yield Prediction Chatbot!
Answer the following questions to predict crop yield.

Enter Crop Type (e.g., Wheat, Rice, Corn): Rice
Enter Soil Type (e.g., Sandy, Loamy, Peaty): Sandy
Enter Soil pH value (e.g., 5.5, 6.2): 5.56
Enter Temperature (°C): 38
Enter Humidity (%): 30
Enter Wind Speed (km/h): 42
Enter Nitrogen (N) level: 800
Enter Phosphorus (P) level: 85
Enter Potassium (K) level: 78
Enter Soil Quality Score: 98

Predicted Crop Yield: 0.00 tons per hectare

Crops with the Lowest Predicted Yield:
- Rice: 0.00 tons/ha
- Tomato: 22.80 tons/ha
- Barley: 25.56 tons/ha


In [None]:
import joblib

# Save trained model
joblib.dump(model, 'yield_model.pkl')

# Save LabelEncoders
joblib.dump(label_encoders['Crop_Type'], 'crop_encoder.pkl')
joblib.dump(label_encoders['Soil_Type'], 'soil_encoder.pkl')


['soil_encoder.pkl']