<a href="https://colab.research.google.com/github/aaagairi/Aditi_Gairi_CSEAIML-B_SmartFarming/blob/main/Crop_Yield_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

In [8]:
df = pd.read_csv('crop_yield.csv')
# categorical variables
label_encoders = {}
categorical_cols = ["Crop", "Season", "State"]

# Handle all known seasons
expected_seasons = ["Kharif", "Rabi", "Whole Year", "Summer", "Autumn", "Winter"]

for col in categorical_cols:
    le = LabelEncoder()
    if col == "Season":
        le.fit(expected_seasons)
        # Strip whitespace from Season column before transforming
        df["Season"] = df["Season"].str.strip()
    else:
        le.fit(df[col])
    df[f"{col}_encoded"] = le.transform(df[col])
    label_encoders[col] = le

In [9]:
# Feature selection
feature_cols = [
    "Area", "Annual_Rainfall", "Fertilizer", "Pesticide",
    "Crop_encoded", "Season_encoded", "State_encoded"
]
X = df[feature_cols].copy()
y = df["Yield"]

In [10]:
# Scale numerical features
scaler = StandardScaler()
numerical_cols = ["Area", "Annual_Rainfall", "Fertilizer", "Pesticide"]
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])


In [11]:
# split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# evalutate model
y_pred = model.predict(X_test)
print("\nModel Evaluation:")
print(f"R² Score: {r2_score(y_test, y_pred):.3f}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.3f} tonnes/hectare")


Model Evaluation:
R² Score: 0.978
Mean Absolute Error: 9.644 tonnes/hectare


In [12]:
# User input
def get_user_input():
    print("\nEnter details for yield prediction:\n")
    area = float(input("Enter area (in hectares): "))
    rainfall = float(input("Enter annual rainfall (in mm): "))
    fertilizer = float(input("Enter fertilizer used (in kg): "))
    pesticide = float(input("Enter pesticide used (in kg): "))

    crop = input("Enter crop name: ").strip()
    season = input("Enter season (Kharif, Rabi, Whole Year, Summer, Autumn, Winter): ").strip()
    state = input("Enter state: ").strip()

    return {
        "Area": area,
        "Annual_Rainfall": rainfall,
        "Fertilizer": fertilizer,
        "Pesticide": pesticide,
        "Crop": crop,
        "Season": season,
        "State": state
    }
# Prediction
def predict_crop_yield(example_input):
    example = example_input.copy()
    for col in ["Crop", "Season", "State"]:
        le = label_encoders[col]
        if example[col] not in le.classes_:
            raise ValueError(f"Unknown {col}: {example[col]}")
        example[f"{col}_encoded"] = le.transform([example[col]])[0]

    # Df for compatability with scalar and model
    features_df = pd.DataFrame([[
        example["Area"],
        example["Annual_Rainfall"],
        example["Fertilizer"],
        example["Pesticide"],
        example["Crop_encoded"],
        example["Season_encoded"],
        example["State_encoded"]
    ]], columns=feature_cols)

    # Scale numerical columns
    features_df[numerical_cols] = scaler.transform(features_df[numerical_cols])

    prediction = model.predict(features_df)[0]
    print(f"\nPredicted Yield: {prediction:.2f} tonnes/hectare")

# Run
try:
    user_input = get_user_input()
    predict_crop_yield(user_input)
except ValueError as ve:
    print(f"Error: {ve}")


Enter details for yield prediction:

Enter area (in hectares): 23
Enter annual rainfall (in mm): 122
Enter fertilizer used (in kg): 98
Enter pesticide used (in kg): 23
Enter crop name: Rice
Enter season (Kharif, Rabi, Whole Year, Summer, Autumn, Winter): Rabi
Enter state: Madhya Pradesh

Predicted Yield: 437.28 tonnes/hectare
