In [4]:
import pandas as pd
from joblib import load

# 1. Load datasets
agri_df = pd.read_excel("Final dataset.xlsx")
msp_df = pd.read_csv("transformed_MSP.csv")

# Clean MSP dataset (standardize column names)
msp_df.columns = msp_df.columns.str.strip().str.replace(" ", "_")

# Example MSP columns after cleaning: ['SL._NO.', 'COMMODITY', 'VARIETY', 'YEAR', 'MSP']

# 2. Load trained yield prediction pipeline
pipe = load("yield_prediction_pipeline.joblib")

# 3. Revenue prediction function
def predict_revenue(crop, land_area_ha, district, year, month):
    """
    Predict yield and calculate expected revenue based on MSP.
    """

    # Step A: Prepare input for yield model (use averages for rainfall/irrigation)
    year_data = agri_df[agri_df["Year"] == year]

    sample = {
        "Year": year,
        "District": district,
        "Crop": crop,
        "Area": land_area_ha,   # farmerâ€™s land in hectares
        "Production": 0,        # placeholder
        "AnnualRainfall": year_data["AnnualRainfall"].mean(),
        "NetSownArea": year_data["NetSownArea"].mean(),
        "CroppingIntensity": year_data["CroppingIntensity"].mean(),
        "NetIrrigatedArea": year_data["NetIrrigatedArea"].mean()
    }

    # Monthly rainfall (boost the specified month slightly if given)
    for m in ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]:
        if m == month:
            sample[m] = year_data[m].mean() * 1.1  # +10% for growing month
        else:
            sample[m] = year_data[m].mean()

    # Step B: Convert to DataFrame
    sample_df = pd.DataFrame([sample])

    # Step C: Predict yield (kg/ha)
    predicted_yield = pipe.predict(sample_df)[0]

    # Step D: Calculate total production
    total_production = predicted_yield * land_area_ha  # kg

    # Step E: Match MSP (try matching crop name with commodity)
    crop_match = crop.upper().strip()
    msp_row = msp_df[msp_df["COMMODITY"].str.upper().str.contains(crop_match, na=False)]
    msp_row = msp_row[msp_row["YEAR"].str.contains(str(year), na=False)]

    if not msp_row.empty:
        msp_value = msp_row["MSP"].mean()  # if multiple varieties exist
    else:
        msp_value = None

    # Step F: Compute revenue
    if msp_value:
        revenue = total_production * msp_value / 100  # MSP is â‚¹ per quintal (100kg)
    else:
        revenue = None

    return {
        "Predicted_Yield_per_ha": predicted_yield,
        "Total_Production_kg": total_production,
        "MSP_(Rs_per_quintal)": msp_value,
        "Expected_Revenue_Rs": revenue
    }


# 4. Example usage
result = predict_revenue(
    crop="Rice",
    land_area_ha=2.0,
    district="ANGUL",
    year=2015,
    month="Jul"
)

print(result)


{'Predicted_Yield_per_ha': 367.59, 'Total_Production_kg': 735.18, 'MSP_(Rs_per_quintal)': None, 'Expected_Revenue_Rs': None}


In [5]:
from sklearn.metrics import r2_score, mean_absolute_error

# --- Evaluate Model ---
y_pred = pipe.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"ðŸ“Š Model Evaluation Metrics:")
print(f"RÂ² Score: {r2:.4f}")
print(f"MAE: {mae:.2f}")

# Save pipeline with metrics in name (optional)
joblib.dump(pipe, f"/mnt/data/yield_prediction_pipeline_r2_{r2:.3f}.joblib")
print("âœ… Pipeline saved successfully with RÂ² in filename")


NameError: name 'X_test' is not defined