In [2]:
import csv

def load_hero_matches_from_csv(csv_file):
    """
    Load a CSV file into a dictionary where keys are seq_num and values are hero_id.
    
    Parameters:
    - csv_file: str, the path to the CSV file to read.
    
    Returns:
    - dict, where keys are seq_num and values are hero_id.
    """
    hero_matches = {}
    
    with open(csv_file, mode='r') as file:
        reader = csv.DictReader(file)
        
        # Iterate through the rows and populate the dictionary
        for row in reader:
            seq_num = int(row["seq_num"])  # Convert seq_num to int
            hero_id = int(row["hero_id"])  # Convert hero_id to int
            hero_matches[seq_num] = hero_id
    
    return hero_matches

# Example usage
csv_file_path = "hero_matches.csv"  # Path to the CSV file
match_hero_ids = load_hero_matches_from_csv(csv_file_path)

# Print the resulting dictionary
print(match_hero_ids)


{6803707269: 61, 6803219777: 31, 6803124865: 4, 6803065802: 4, 6803018429: 4, 6801404603: 14, 6801386701: 29, 6794200794: 4, 6793941569: 4, 6793878034: 55, 6793836309: 55, 6793234081: 81, 6792147679: 4, 6789945976: 4, 6789913358: 81, 6787236269: 30, 6787207859: 30, 6787186453: 4, 6787151056: 81, 6787084387: 81, 6785909782: 4, 6785864890: 81, 6783339972: 81, 6782781883: 81, 6782071877: 30, 6782010193: 81, 6781962714: 81, 6781940483: 81, 6781562659: 81, 6781534083: 81, 6781510160: 81, 6781483934: 4, 6781462374: 18, 6781442117: 4, 6780157969: 54, 6780106669: 54, 6780074150: 54, 6780050532: 54, 6780021846: 2, 6779971835: 49, 6779919767: 18, 6778666691: 18, 6778620270: 59, 6777367976: 2, 6777331925: 2, 6152974217: 74, 6152932498: 99, 6152918305: 59, 6152894364: 94, 5870245021: 95, 5866135743: 101, 5866094832: 59, 5630666786: 59, 5562744205: 10, 5562710045: 59, 5562680201: 95, 5559024683: 95, 5558981849: 59, 4957374265: 105, 4957283162: 95, 4956053648: 63, 4956025695: 33, 4955395413: 33, 495

In [3]:
import os
import json
import pandas as pd

acc = []
# Constants
RAW_DATA_DIR = "raw_data_json"  # Directory containing JSON files
CSV_OUTPUT_DIR = "csv_data"    # Directory for output CSV files

os.makedirs(CSV_OUTPUT_DIR, exist_ok=True)

def parse_json_to_csv(json_file, csv_file):
    """Parse matches from JSON and save relevant data to a CSV."""
    data = []  # Store the final player-level data
    
    # Read the JSON file
    with open(json_file, 'r') as f:
        matches = json.load(f)
    
    # Validate JSON format
    if not isinstance(matches, list):
        print(f"Invalid JSON format in {json_file}")
        return

    # Iterate through each match
    for match in matches:
        # Skip short matches
        if match.get("duration", 0) <= 900:
            continue

        # Match-level details
        match_details = {
            "start_time": match.get("start_time"),
            "duration": match.get("duration"),
            "radiant_score": match.get("radiant_score"),
            "dire_score": match.get("dire_score"),
            "game_mode": match.get("game_mode"),
        }

        # Check if players exist
        players = match.get("players", [])
        if not players:
            continue
        match_seq_num = match.get("match_seq_num")
        # Find your player data
        team_num = 0 # just to initialiaze
        for player in players:
            account_id = player.get("account_id")
            
            if  match_hero_ids[match_seq_num] == player.get("hero_id"):
                team_num = player.get("team_number") # my real team_num(0 represents Radiant, 1 represents Dire)
                acc.append(account_id)
                # Player-level details
                kills = player.get("kills", 0)
                deaths = player.get("deaths", 0)
                assists = player.get("assists", 0)
                radiant_score = match.get("radiant_score", 1)  # Prevent division by zero
                dire_score = match.get("dire_score", 1)

                # Calculate performance and player_win
                performance = ((kills + assists) / (radiant_score if team_num == 0 else dire_score)) - (
                    deaths / (dire_score if team_num == 0 else radiant_score)
                )
                player_win = int(
                    (match.get("radiant_win") and player.get("team_number") == 0) or
                    (not match.get("radiant_win") and player.get("team_number") == 1)
                )

                my_player_details = {
                    "kills": kills,
                    "deaths": deaths,
                    "assists": assists,
                    "gold_per_min": player.get("gold_per_min", 0),
                    "xp_per_min": player.get("xp_per_min", 0),
                    "my_hero_id": player.get("hero_id", 0), # my_hero_id
                    "player_win": player_win,
                    "performance": performance,
                }

                
                break  # Stop after processing your player data

        allies_player_details = {}
        enemies_player_details = {}
        counter_allies = 0    
        counter_enemies = 0
        for player in players:
            team_num_others = player.get("team_number")
            if team_num_others == team_num and match_hero_ids[match_seq_num] != player.get("hero_id"): # these players are allies, we add their hero_ids one by one right after my_player_details(applied in csv file) A1, A2 ... A4
                counter_allies += 1
                # Add to allies with keys like A1, A2, ..., A4
                allies_player_details[f"A{counter_allies}"] = player.get("hero_id")
            elif team_num_others != team_num: #these players are enemies, we add their hero_ids one by one right after my hero_id(applied in csv file) E1, E2 ... E5
                counter_enemies += 1
                # Add to enemies with keys like E1, E2, ..., E5
                enemies_player_details[f"E{counter_enemies}"] = player.get("hero_id")
        
        combined_details = {**match_details, **my_player_details, **allies_player_details, ** enemies_player_details} # Combine details
        data.append(combined_details)
        
    # Create a DataFrame and save as CSV
    if data:
        df = pd.DataFrame(data)
        print(df.shape)
        df.to_csv(csv_file, index=False)
        print(f"Saved matches from {json_file} to {csv_file}")
    else:
        print(f"No data found in {json_file}")

def main():
    # Iterate through all JSON files in the raw data directory
    for json_file in os.listdir(RAW_DATA_DIR):
        if json_file.endswith(".json"):
            json_path = os.path.join(RAW_DATA_DIR, json_file)
            csv_path = os.path.join(CSV_OUTPUT_DIR, json_file.replace(".json", ".csv"))
            parse_json_to_csv(json_path, csv_path)

if __name__ == "__main__":
    main()


(97, 22)
Saved matches from raw_data_json/matches_batch_1.json to csv_data/matches_batch_1.csv
(99, 22)
Saved matches from raw_data_json/matches_batch_2.json to csv_data/matches_batch_2.csv
(100, 22)
Saved matches from raw_data_json/matches_batch_3.json to csv_data/matches_batch_3.csv
(32, 22)
Saved matches from raw_data_json/matches_batch_4.json to csv_data/matches_batch_4.csv


In [7]:
len(acc)

328