In [12]:
import os
import yaml
import csv

In [13]:
Dataset_path = r"D:\Git Repositories\bbl"

In [14]:
yaml_files = [f for f in os.listdir(Dataset_path) if f.endswith('.yaml') or f.endswith('.yml')]

In [15]:
yaml_data = []

In [16]:
for yaml_file in yaml_files:
    file_path = os.path.join(Dataset_path, yaml_file)

    try:
        with open(file_path, 'r') as file:
            data = yaml.safe_load(file)

            if data is not None:
                yaml_data.append(data)
            else:
                print(f"Warning : {yaml_file} is empty or invalid")

    except Exception as e:
        print(f"Error reading {yaml_file}: {e}")
        
    
    

In [17]:
len(yaml_data)

618

In [18]:
len(yaml_files)

618

In [64]:
def extract_ball_by_ball(all_matches):

    ball_data = []
    
    for match in all_matches:
        match_info = match.get("info", {})
        match_date = match_info.get("dates", ["Unknown"])[0]
        
        for inning in match.get("innings", []):
            for inning_name, inning_info in inning.items():
                team = inning_info.get("team", "Unknown")
                deliveries = inning_info.get("deliveries", [])
                
                for delivery in deliveries:
                    for ball_id, ball_info in delivery.items():
                        try:
                            over_str, ball_str = str(ball_id).split(".")
                            over = int(over_str)
                            ball = int(ball_str)
                        except Exception as e:
                            print(f"Error parsing ball id {ball_id}: {e}")
                            continue
                        
                        wicket_player_out = None
                        wicket_kind = None
                        fielder = None
                        
                        wicket_info = ball_info.get("wicket", None)
                        if wicket_info:
                            if isinstance(wicket_info, list):
                                wicket_entry = wicket_info[0] if wicket_info else {}
                            elif isinstance(wicket_info, dict):
                                wicket_entry = wicket_info
                            else:
                                wicket_entry = {}
                            
                            wicket_kind = wicket_entry.get("kind", None)
                            wicket_player_out = wicket_entry.get("player_out", None)
                            
                            fielders_list = wicket_entry.get("fielders", [])
                            if isinstance(fielders_list, list):
                                fielder = ", ".join(fielders_list)
                            else:
                                fielder = fielders_list
                        
                        ball_entry = {
                            "date": match_date,
                            "team": team,
                            "inning": inning_name,
                            "over": over,
                            "ball": ball,
                            "batsman": ball_info.get("batsman"),
                            "bowler": ball_info.get("bowler"),
                            "non_striker": ball_info.get("non_striker"),
                            "runs_batsman": ball_info.get("runs", {}).get("batsman", 0),
                            "runs_extras": ball_info.get("runs", {}).get("extras", 0),
                            "runs_total": ball_info.get("runs", {}).get("total", 0),
                            "wicket_player_out": wicket_player_out,
                            "wicket_kind": wicket_kind,
                            "fielder": fielder
                        }
                        ball_data.append(ball_entry)
    
    return ball_data

In [65]:
flattened_data = extract_ball_by_ball(yaml_data)

In [68]:
csv_file_path = r"D:\Git Repositories\AuctionScout-AI-powered-player-recommendations-for-IPL-auctions\dataset\all_bbl_matches_ball_by_ball.csv"
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    fieldnames = ["date", "team", "inning", "over", "ball", "batsman", "bowler", "non_striker", "runs_batsman", "runs_extras", "runs_total","wicket_player_out", "fielder", "wicket_kind"]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(flattened_data)

print(f"CSV file saved at {csv_file_path}")


CSV file saved at D:\Git Repositories\AuctionScout-AI-powered-player-recommendations-for-IPL-auctions\dataset\all_bbl_matches_ball_by_ball.csv
