In [None]:
import pandas as pd
import numpy as np
import ast

df = pd.read_csv("final_clean_ipl_2023_2025.csv")

In [None]:
# Function to parse string representation of list and sum numeric values
def parse_and_sum(list_str):
    try:
        data = ast.literal_eval(list_str)
        if isinstance(data, list):
            return sum(pd.to_numeric(item, errors='coerce') for item in data)
        return 0
    except (ValueError, SyntaxError, TypeError):
        return 0

In [None]:
# Apply the function to relevant columns
df['Total_4s'] = df['Bt_four'].apply(parse_and_sum)
df['Total_6s'] = df['Bt_six'].apply(parse_and_sum)
df['Total_Wkts'] = df['Bw_Wickets'].apply(parse_and_sum)
df[['player_name', 'Total_4s', 'Total_6s', 'Total_Wkts']].head()

In [None]:
aggregation_rules = {
    'Total_Matches': 'sum',
    'Total_Bt_Runs': 'sum',
    'Total_Bt_Balls': 'sum',
    'Total_Bw_Runs': 'sum',
    'Total_Bw_Balls': 'sum',
    'Total_4s': 'sum',
    'Total_6s': 'sum',
    'Total_Wkts': 'sum'
}

agg_df = df.groupby('player_name', as_index=False).agg(aggregation_rules)
agg_df.head()

In [None]:
# Fantasy Points Calculation
POINTS_PER_RUN = 1
POINTS_PER_FOUR = 1
POINTS_PER_SIX = 2
POINTS_PER_WICKET = 25

agg_df['Batting_Points'] = (
    agg_df['Total_Bt_Runs'] * POINTS_PER_RUN +
    agg_df['Total_4s'] * POINTS_PER_FOUR +
    agg_df['Total_6s'] * POINTS_PER_SIX
)

agg_df['Bowling_Points'] = agg_df['Total_Wkts'] * POINTS_PER_WICKET
agg_df['Total_Fantasy_Points'] = agg_df['Batting_Points'] + agg_df['Bowling_Points']
agg_df[['player_name', 'Batting_Points', 'Bowling_Points', 'Total_Fantasy_Points']].head()

In [None]:
# Calculate Fantasy Points per Match
agg_df['FPS_per_Match'] = np.where(
    agg_df['Total_Matches'] > 0,
    agg_df['Total_Fantasy_Points'] / agg_df['Total_Matches'],
    0
)
agg_df[['player_name', 'Total_Matches', 'FPS_per_Match']].head()

In [None]:
MATCH_THRESHOLD = 10.0  # Full confidence after 10 matches
agg_df['Match_Factor'] = np.clip(agg_df['Total_Matches'] / MATCH_THRESHOLD, 0, 1)
agg_df['Adjusted_FPS'] = agg_df['FPS_per_Match'] * agg_df['Match_Factor']

agg_df[['player_name', 'Total_Matches', 'FPS_per_Match', 'Match_Factor', 'Adjusted_FPS']].head()


In [None]:
final_table = agg_df.sort_values(by='Adjusted_FPS', ascending=False)

output_file = 'phase2_adjusted_player_fps.csv'
final_table.to_csv(output_file, index=False)

print("\nTop 10 Players (by Adjusted FPS):")
display(final_table[['player_name', 'Total_Matches', 'Adjusted_FPS', 'FPS_per_Match', 'Match_Factor']].head(10))


In [None]:
df = pd.read_csv('phase2_adjusted_player_fps.csv')
df.head()

In [None]:
# Batting strike rate
agg_df["Strike_Rate"] = np.where(agg_df["Total_Bt_Balls"] > 0,
                                 (agg_df["Total_Bt_Runs"] / agg_df["Total_Bt_Balls"]) * 100,
                                 0)

agg_df["SR_Bonus_Points"] = np.select(
    [
        agg_df["Strike_Rate"] > 170,
        (agg_df["Strike_Rate"] >= 150) & (agg_df["Strike_Rate"] <= 170),
        (agg_df["Strike_Rate"] >= 130) & (agg_df["Strike_Rate"] < 150),
        (agg_df["Strike_Rate"] >= 60) & (agg_df["Strike_Rate"] < 70),
        (agg_df["Strike_Rate"] >= 50) & (agg_df["Strike_Rate"] < 60),
        (agg_df["Strike_Rate"] < 50)
    ],
    [6, 4, 2, -2, -4, -6],
    default=0
)

# Bowling economy (NaN for non-bowlers)
agg_df["Economy_Rate"] = np.where(agg_df["Total_Bw_Balls"] > 0,
                                  agg_df["Total_Bw_Runs"] / (agg_df["Total_Bw_Balls"] / 6),
                                  np.nan)

econ_conditions = [
    agg_df["Economy_Rate"].isna(),
    (agg_df["Economy_Rate"] < 5),
    (agg_df["Economy_Rate"] >= 5) & (agg_df["Economy_Rate"] < 6),
    (agg_df["Economy_Rate"] >= 6) & (agg_df["Economy_Rate"] < 7),
    (agg_df["Economy_Rate"] >= 10) & (agg_df["Economy_Rate"] < 11),
    (agg_df["Economy_Rate"] >= 11) & (agg_df["Economy_Rate"] < 12),
    (agg_df["Economy_Rate"] >= 12)
]
econ_values = [0, 6, 4, 2, -2, -4, -6]

agg_df["Econ_Bonus_Points"] = np.select(econ_conditions, econ_values, default=0)


In [None]:
agg_df["Enhanced_FPS"] = (
    agg_df["Adjusted_FPS"] +
    agg_df["SR_Bonus_Points"] +
    agg_df["Econ_Bonus_Points"]
)

output_file = "phase2.1_enhanced_fps_no_roles.csv"
agg_df.to_csv(output_file, index=False)

agg_df.sort_values("Enhanced_FPS", ascending=False).head(10)

In [None]:
import matplotlib.pyplot as plt

top10 = agg_df.sort_values("Enhanced_FPS", ascending=False).head(10)
plt.figure(figsize=(10,6))
plt.barh(top10["player_name"], top10["Enhanced_FPS"], color="lightgreen")
plt.gca().invert_yaxis()
plt.title("Top 10 Players by Enhanced Fantasy Potential Score")
plt.xlabel("Enhanced FPS")
plt.ylabel("Player")
plt.grid(axis="x", linestyle="--", alpha=0.5)
plt.show()