In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Market price
M = 2485  

# Load datasets
wb_data = pd.read_csv("finalcompileddatawb.csv")
up_data = pd.read_csv("finalcompileddataup.csv")
bi_data = pd.read_csv("finalcompileddatabihar.csv")

# Parameters from your paper (Table 8)
params = {
    "West Bengal": {
        "lambda": [0.4539, 0.0011],
        "original_strikes": {"rain": 497, "irrig": 3108000},
        "optimized_strikes": {"rain": 540, "irrig": 3130000}
    },
    "Uttar Pradesh": {
        "lambda": [79.61, 0.3612],
        "original_strikes": {"temp": 24, "rain": 300},
        "optimized_strikes": {"temp": 24.5, "rain": 220}
    },
    "Bihar": {
        "lambda": [0.0004, 0.0832],
        "original_strikes": {"irrig": 3200000, "npk": 800},
        "optimized_strikes": {"irrig": 3000000, "npk": 820}
    }
}

# ---- Helper functions ----
def safe_max(a, b=0):
    if pd.isna(a):
        return 0
    return max(a, b)

def compute_indemnities(df, state, strikes):
    ind = []
    if state == "West Bengal":
        for _, row in df.iterrows():
            R = row["Rain Fall (in millimeters)"]
            I = row["Net irrigated Area (in Hectare)"]   # convert to millions
            val = (params[state]["lambda"][0] * safe_max(strikes["rain"] - R) +   # deficit rainfall
                   params[state]["lambda"][1] * safe_max(strikes["irrig"] - I))*21.25  # deficit irrigation
            ind.append(val)
    elif state == "Uttar Pradesh":
        for _, row in df.iterrows():
            T = row["Temperature"]
            R = row["Rain Fall (in millimeters)"]
            val = (params[state]["lambda"][0] * safe_max(T - strikes["temp"]) +   # excess temperature
                   params[state]["lambda"][1] * safe_max(strikes["rain"] - R))*21.25 # deficit rainfall
            ind.append(val)
    elif state == "Bihar":
        for _, row in df.iterrows():
            I = row["Net irrigated Area (in Hectare)"]   # convert to millions
            N = row["Amount of NPK Used "]
            val = (params[state]["lambda"][0] * safe_max(strikes["irrig"] - I) +  # deficit irrigation
                   params[state]["lambda"][1] * safe_max(strikes["npk"] - N))*21.25    # deficit NPK
            ind.append(val)
    return ind

def pure_premium(df, state):
    orig_ind = compute_indemnities(df, state, params[state]["original_strikes"])
    opt_ind  = compute_indemnities(df, state, params[state]["optimized_strikes"])
    return {
        "Original Pure Premium": np.mean(orig_ind),
        "Optimized Pure Premium": np.mean(opt_ind)
    }

def pure_premium_rate(df):
    # Fit simple linear regression on Year -> Yield
    df_clean = df.dropna(subset=["Yield of Wheat (kg/Hectare)"])
    X = np.arange(len(df_clean)).reshape(-1,1)  # time index
    y = df_clean["Yield of Wheat (kg/Hectare)"].values
    model = LinearRegression().fit(X, y)
    y_pred = model.predict(X)
    
    MAE = np.mean(np.abs(y - y_pred))
    PPR = (MAE / np.mean(y)) * 100
    return PPR

# ---- Run for all states ----
results = {}
for df, state in zip([wb_data, up_data, bi_data], ["West Bengal", "Uttar Pradesh", "Bihar"]):
    pp = pure_premium(df, state)
    ppr = pure_premium_rate(df)
    results[state] = {
        "Pure Premium (₹/ha)": pp["Optimized Pure Premium"],
        "Pure Premium Rate (%)": ppr
    }

# Display results
for state, vals in results.items():
    print(f"\n--- {state} ---")
    for k,v in vals.items():
        print(f"{k}: {v:.2f}")
