# cricWAR Financial Valuation Analysis (IPL 2025)

This notebook implements the "Moneyball" metrics for IPL 2025 players, comparing their auction price to their on-field performance (WAR).

## Metrics
1. **ROI Basic**: Wins per 1% of Salary Cap.
2. **VOPE (Value Over Price Expectation)**: Performance vs. expected performance for that price tag (Polynomial Regression).
3. **VOMAM (Value Over Market Adjusted Model)**: Performance vs. expected performance adjusting for Price, Role, and Overseas status.

## Validation
Each step includes `assert` statements to verify data integrity and calculation correctness.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

# Constants
TEAM_PURSE_2025 = 120.0  # Crores
PROJECT_ROOT = Path('..')

## 1. Data Loading & Preparation

In [None]:
def load_data():
    # Load mapped data
    war_price_file = PROJECT_ROOT / 'results' / '11_war_vs_price' / 'war_vs_price_full.csv'
    df = pd.read_csv(war_price_file)
    
    # Filter mapped players
    df = df[df['match_type'] != 'none'].copy()
    
    # Load metadata
    meta_file = PROJECT_ROOT / 'data' / 'player_metadata.csv'
    metadata = pd.read_csv(meta_file)
    
    # Merge
    df = df.merge(metadata[['player_name', 'country', 'role_category']], 
                  left_on='cricwar_name', right_on='player_name', how='left')
    
    # Feature Engineering
    df['price_norm'] = (df['price_cr'] / TEAM_PURSE_2025) * 100
    df['is_overseas'] = df['country'].apply(lambda x: 0 if str(x).lower() in ['india', 'ind'] else 1)
    df['role_category'] = df['role_category'].fillna('Unknown')
    
    return df

df = load_data()

# --- TESTS ---
print("Running Data Tests...")
assert len(df) > 0, "Dataframe is empty!"
assert 'total_WAR' in df.columns, "Missing total_WAR column"
assert 'price_cr' in df.columns, "Missing price_cr column"
assert df['price_norm'].max() <= 100, "Price norm > 100%"
assert df['is_overseas'].isin([0, 1]).all(), "Invalid is_overseas values"
print(f"✓ Data Loaded: {len(df)} players")
df.head()

## 2. Metric 1: ROI Basic
**Wins per 1% of Salary Cap**

In [None]:
def calculate_roi(df):
    # Floor price to avoid division by zero
    min_price_norm = 0.05
    df['price_norm_adj'] = df['price_norm'].clip(lower=min_price_norm)
    df['roi_basic'] = df['total_WAR'] / df['price_norm_adj']
    return df

df = calculate_roi(df)

# --- TESTS ---
print("Running ROI Tests...")
assert 'roi_basic' in df.columns
assert not df['roi_basic'].isna().any(), "NaN values in ROI"
# Check a known high performer
top_roi = df.sort_values('roi_basic', ascending=False).iloc[0]
print(f"Top ROI: {top_roi['price_name']} ({top_roi['roi_basic']:.2f} wins/1% cap)")
print("✓ ROI Calculated")

## 3. Metric 2: VOPE Simple
**Value Over Price Expectation** (Polynomial Regression)

In [None]:
def calculate_vope(df):
    X = df[['price_norm']].values
    y = df['total_WAR'].values
    
    poly = PolynomialFeatures(degree=2, include_bias=False)
    X_poly = poly.fit_transform(X)
    
    model = LinearRegression()
    model.fit(X_poly, y)
    
    df['xWAR_price_only'] = model.predict(X_poly)
    df['vope_simple'] = df['total_WAR'] - df['xWAR_price_only']
    
    return df, model

df, vope_model = calculate_vope(df)

# --- TESTS ---
print("Running VOPE Tests...")
assert 'vope_simple' in df.columns
# Residuals should sum to approx zero
assert abs(df['vope_simple'].mean()) < 1e-10, "Residuals do not mean-center to 0"
print("✓ VOPE Calculated")

## 4. Metric 3: VOMAM
**Value Over Market Adjusted Model** (Multivariate Regression)

In [None]:
def calculate_vomam(df):
    role_dummies = pd.get_dummies(df['role_category'], prefix='role', drop_first=True, dtype=int)
    
    X = df[['price_norm', 'is_overseas']]
    X = pd.concat([X, role_dummies], axis=1)
    X = sm.add_constant(X)
    y = df['total_WAR']
    
    model = sm.OLS(y, X).fit()
    
    df['xWAR_market_adj'] = model.predict(X)
    df['vomam_score'] = df['total_WAR'] - df['xWAR_market_adj']
    
    return df, model

df, vomam_model = calculate_vomam(df)

# --- TESTS ---
print("Running VOMAM Tests...")
assert 'vomam_score' in df.columns
print("✓ VOMAM Calculated")
print(vomam_model.summary())

## 5. Visualizations

In [None]:
plt.figure(figsize=(12, 8))

# Scatter plot
sns.scatterplot(data=df, x='price_norm', y='total_WAR', hue='role_category', style='is_overseas', s=100, alpha=0.7)

# Regression Curve (VOPE)
x_range = np.linspace(df['price_norm'].min(), df['price_norm'].max(), 100).reshape(-1, 1)
poly = PolynomialFeatures(degree=2, include_bias=False)
x_poly = poly.fit_transform(x_range)

# Re-fit simple model for plotting
simple_model = LinearRegression()
simple_model.fit(poly.fit_transform(df[['price_norm']]), df['total_WAR'])
y_pred = simple_model.predict(x_poly)

plt.plot(x_range, y_pred, color='red', linestyle='--', linewidth=2, label='Expected WAR (Price Only)')

# Label top outliers
top_steals = df.nlargest(5, 'vomam_score')
for _, row in top_steals.iterrows():
    plt.text(row['price_norm']+0.5, row['total_WAR'], row['price_name'], fontsize=9, fontweight='bold')

top_overpays = df.nsmallest(3, 'vomam_score')
for _, row in top_overpays.iterrows():
    plt.text(row['price_norm']+0.5, row['total_WAR'], row['price_name'], fontsize=9, color='red')

plt.title('Price vs WAR: Identifying Market Inefficiencies (2025)', fontsize=14)
plt.xlabel('Price (% of Salary Cap)', fontsize=12)
plt.ylabel('Realized WAR', fontsize=12)
plt.axhline(0, color='black', alpha=0.2)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## 6. Top Value Picks (The "Moneyball" List)

In [None]:
cols = ['price_name', 'team', 'price_cr', 'total_WAR', 'xWAR_market_adj', 'vomam_score']
print("TOP 10 STEALS (Highest VOMAM):")
display(df[cols].nlargest(10, 'vomam_score'))

print("\nTOP 10 OVERPAYS (Lowest VOMAM):")
display(df[cols].nsmallest(10, 'vomam_score'))