In [5]:
import os
import json
import pandas as pd
from collections import defaultdict

def extract_match_data(file_path):
    with open(file_path) as f:
        data = json.load(f)

    match_id = os.path.basename(file_path).split('.')[0]
    match_date = data['info']['dates'][0]
    venue = data['info'].get('venue', 'Unknown')
    teams = data['info']['teams']
    playing_11 = data['info']['players']  # dict: team -> list of players

    # Create player to team map for all players in the match
    player_to_team = {}
    for team_name, players in playing_11.items():
        for p in players:
            player_to_team[p] = team_name

    stats = defaultdict(lambda: defaultdict(int))

    for inning in data['innings']:
        batting_team = inning['team']
        for over in inning['overs']:
            for delivery in over['deliveries']:
                batter = delivery['batter']
                bowler = delivery['bowler']
                runs = delivery['runs']['batter']
                total_runs = delivery['runs']['total']

                # Batter stats
                stats[(batter, batting_team)]['runs'] += runs
                stats[(batter, batting_team)]['balls'] += 1
                if runs == 4:
                    stats[(batter, batting_team)]['fours'] += 1
                if runs == 6:
                    stats[(batter, batting_team)]['sixes'] += 1

                # Bowler stats
                bowler_team = player_to_team.get(bowler, "Unknown")
                stats[(bowler, bowler_team)]['balls_bowled'] += 1
                stats[(bowler, bowler_team)]['runs_conceded'] += total_runs

                if 'wickets' in delivery:
                    for w in delivery['wickets']:
                        if w['kind'] != 'run out':
                            stats[(bowler, bowler_team)]['wickets'] += 1
                        # Track duck
                        if w['player_out'] == batter:
                            stats[(batter, batting_team)]['was_out'] = True

    rows = []
    for (player, team), s in stats.items():
        # opponent is the other team in the match
        opponent = [t for t in teams if t != team][0]
        balls = s.get('balls', 0)
        runs = s.get('runs', 0)
        balls_bowled = s.get('balls_bowled', 0)
        runs_conceded = s.get('runs_conceded', 0)
        economy = (runs_conceded / (balls_bowled / 6)) if balls_bowled >= 12 else None
        sr = (runs / balls * 100) if balls else None

        # --- Fantasy Points Calculation ---
        fp = 0
        fp += runs
        fp += s.get('fours', 0) * 4
        fp += s.get('sixes', 0) * 6
        fp += s.get('wickets', 0) * 30

        # Strike rate bonus
        if balls >= 10 and sr:
            if sr > 170:
                fp += 6
            elif sr > 150:
                fp += 4
            elif sr > 130:
                fp += 2
            elif sr < 50:
                fp -= 3

        # Duck penalty if in top 7 and out on 0
        if runs == 0 and s.get('was_out', False):
            # playing_11 is dict team -> list of players
            if team in playing_11 and player in playing_11[team][:7]:
                fp -= 2

        # Economy bonus
        if balls_bowled >= 12 and economy:
            if economy < 5:
                fp += 6
            elif economy < 6:
                fp += 4
            elif economy <= 7:
                fp += 2

        rows.append({
            'match_id': match_id,
            'date': match_date,
            'venue': venue,
            'player': player,
            'team': team,
            'opponent': opponent,
            'runs': runs,
            'balls': balls,
            'fours': s.get('fours', 0),
            'sixes': s.get('sixes', 0),
            'wickets': s.get('wickets', 0),
            'balls_bowled': balls_bowled,
            'runs_conceded': runs_conceded,
            'economy': round(economy, 2) if economy else None,
            'strike_rate': round(sr, 2) if sr else None,
            'fantasy_points': fp
        })

    return rows

# === MAIN EXECUTION ===
folder_path = os.path.expanduser("~/Downloads/ipl_json")
all_rows = []

for file in os.listdir(folder_path):
    if file.endswith('.json'):
        file_path = os.path.join(folder_path, file)
        all_rows.extend(extract_match_data(file_path))

df = pd.DataFrame(all_rows)
df.to_csv("fantasy_dataset2.csv", index=False)
print("Data saved to fantasy_dataset_enhanced.csv with full fantasy logic")



Data saved to fantasy_dataset_enhanced.csv with full fantasy logic
