In [2]:
import os
import json
import xml.etree.ElementTree as ET
import re

def load_mappings(xml_folder):
    player_mappings = {}
    team_mappings = {}
    for filename in os.listdir(xml_folder):
        if filename.endswith(".xml"):
            path = os.path.join(xml_folder, filename)
            tree = ET.parse(path)
            root = tree.getroot()
            # Load player mappings
            for player in root.findall(".//Player"):
                player_id = player.get("player_id")
                player_name = player.get("player_name")
                player_mappings[player_id] = player_name
            # Load team mappings based on SoccerFeed structure
            for soccerfeed in root.findall(".//SoccerFeed"):
                home_team_id = soccerfeed.find(".//home_team_id").text
                home_team_name = soccerfeed.find(".//home_team_name").text
                away_team_id = soccerfeed.find(".//away_team_id").text
                away_team_name = soccerfeed.find(".//away_team_name").text
                team_mappings[home_team_id] = home_team_name
                team_mappings[away_team_id] = away_team_name
            print(f"Loaded {len(player_mappings)} player and {len(team_mappings)} team entries from {filename}.")
    return player_mappings, team_mappings

def replace_ids_with_names(json_folder, player_mappings, team_mappings):
    for filename in os.listdir(json_folder):
        if filename.endswith(".json"):
            path = os.path.join(json_folder, filename)
            with open(path, 'r') as file:
                data = json.load(file)
            
            # Process each event in the JSON file
            events_updated = 0
            for event in data['events']:
                summary = event['summary']
                # Replace all IDs found with corresponding names
                matches = re.findall(r'\d+', summary)
                for match in matches:
                    if match in player_mappings:
                        summary = summary.replace(match, player_mappings[match])
                    elif match in team_mappings:
                        summary = summary.replace(match, team_mappings[match])
                event['summary'] = summary
                events_updated += 1
            
            # Save the modified JSON file
            with open(path, 'w') as file:
                json.dump(data, file, indent=4)
            print(f"Updated {events_updated} events in {filename}")

# Paths to the folders
xml_folder = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/pass-files'  # Update with your actual path
json_folder = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/json-summaries'  # Update with your actual path

# Load player and team mappings from XML files
player_mappings, team_mappings = load_mappings(xml_folder)

# Replace IDs in JSON files
replace_ids_with_names(json_folder, player_mappings, team_mappings)


Loaded 16 player and 0 team entries from pass_matrix_100_2022_g2359864_t418.xml.
Loaded 32 player and 0 team entries from pass_matrix_100_2022_g2290059_t569.xml.
Loaded 47 player and 0 team entries from pass_matrix_100_2022_g2359898_t2289.xml.
Loaded 62 player and 0 team entries from pass_matrix_100_2022_g2290014_t1000.xml.
Loaded 78 player and 0 team entries from pass_matrix_100_2022_g2290013_t2592.xml.
Loaded 93 player and 0 team entries from pass_matrix_100_2022_g2290096_t272.xml.
Loaded 98 player and 0 team entries from pass_matrix_100_2022_g2290082_t569.xml.
Loaded 114 player and 0 team entries from pass_matrix_100_2022_g2290015_t239.xml.
Loaded 129 player and 0 team entries from pass_matrix_100_2022_g2359892_t401.xml.
Loaded 145 player and 0 team entries from pass_matrix_100_2022_g2359881_t545.xml.
Loaded 151 player and 0 team entries from pass_matrix_100_2022_g2290069_t401.xml.
Loaded 157 player and 0 team entries from pass_matrix_100_2022_g2290011_t569.xml.
Loaded 159 player an

In [3]:
import json
import os

def load_and_replace_team_names(json_folder):
    # Iterate over each file in the folder
    for filename in os.listdir(json_folder):
        if filename.endswith(".json"):
            filepath = os.path.join(json_folder, filename)
            with open(filepath, 'r', encoding='utf-8') as file:
                data = json.load(file)

            # Extract team names and IDs from match_info
            team_names = {
                data['match_info']['home_team']['id']: data['match_info']['home_team']['name'],
                data['match_info']['away_team']['id']: data['match_info']['away_team']['name']
            }

            # Replace team IDs with team names in each event's summary
            for event in data['events']:
                for team_id, team_name in team_names.items():
                    event['summary'] = event['summary'].replace(f"team {team_id}", f"team {team_name}")

            # Save the modified data back to the JSON file
            with open(filepath, 'w', encoding='utf-8') as file:
                json.dump(data, file, indent=4)

            print(f"Processed {filename}")

# Specify the path to the folder containing the JSON files
json_folder = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/json-summaries'  # Adjust this path to your folder location

# Run the function
load_and_replace_team_names(json_folder)


Processed f24-100-2022-2290109-eventdetails.json
Processed f24b-100-2022-2290106-eventdetails.json
Processed f24-100-2022-2359898-eventdetails.json
Processed f24-100-2022-2290073-eventdetails.json
Processed f24b-100-2022-2290032-eventdetails.json
Processed f24-100-2022-2290015-eventdetails.json
Processed f24-100-2022-2290121-eventdetails.json
Processed f24-100-2022-2359860-eventdetails.json
Processed f24-100-2022-2290097-eventdetails.json
Processed f24b-100-2022-2290098-eventdetails.json
Processed f24-100-2022-2359906-eventdetails.json
Processed f24b-100-2022-2290048-eventdetails.json
Processed f24-100-2022-2290009-eventdetails.json
Processed f24-100-2022-2290047-eventdetails.json
Processed f24-100-2022-2359884-eventdetails.json
Processed f24-100-2022-2290021-eventdetails.json
Processed f24-100-2022-2290115-eventdetails.json
Processed f24-100-2022-2359854-eventdetails.json
Processed f24-100-2022-2290038-eventdetails.json
Processed f24-100-2022-2290076-eventdetails.json
Processed f24-10

In [1]:
import json
import os

def load_and_replace_team_names(json_folder):
    # Iterate over each file in the folder
    for filename in os.listdir(json_folder):
        if filename.endswith(".json"):
            filepath = os.path.join(json_folder, filename)
            with open(filepath, 'r', encoding='utf-8') as file:
                data = json.load(file)

            # Extract team names and IDs from match_info
            team_names = {
                data['match_info']['home_team']['id']: data['match_info']['home_team']['name'],
                data['match_info']['away_team']['id']: data['match_info']['away_team']['name']
            }

            # Replace team IDs with team names in each event's summary
            for event in data['events']:
                for team_id, team_name in team_names.items():
                    event['summary'] = event['summary'].replace(f"team {team_id}", f"team {team_name}")

            # Save the modified data back to the JSON file
            # Using ensure_ascii=False to avoid escaping non-ASCII characters
            with open(filepath, 'w', encoding='utf-8') as file:
                json.dump(data, file, indent=4, ensure_ascii=False)

            print(f"Processed {filename}")

# Specify the path to the folder containing the JSON files
json_folder = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/json-summaries'  # Adjust this path to your folder location

# Run the function
load_and_replace_team_names(json_folder)


Processed f24-100-2022-2290109-eventdetails.json
Processed f24b-100-2022-2290106-eventdetails.json
Processed f24-100-2022-2359898-eventdetails.json
Processed f24-100-2022-2290073-eventdetails.json
Processed f24b-100-2022-2290032-eventdetails.json
Processed f24-100-2022-2290015-eventdetails.json
Processed f24-100-2022-2290121-eventdetails.json
Processed f24-100-2022-2359860-eventdetails.json
Processed f24-100-2022-2290097-eventdetails.json
Processed f24b-100-2022-2290098-eventdetails.json
Processed f24-100-2022-2359906-eventdetails.json
Processed f24b-100-2022-2290048-eventdetails.json
Processed f24-100-2022-2290009-eventdetails.json
Processed f24-100-2022-2290047-eventdetails.json
Processed f24-100-2022-2359884-eventdetails.json
Processed f24-100-2022-2290021-eventdetails.json
Processed f24-100-2022-2290115-eventdetails.json
Processed f24-100-2022-2359854-eventdetails.json
Processed f24-100-2022-2290038-eventdetails.json
Processed f24-100-2022-2290076-eventdetails.json
Processed f24-10

In [4]:
import os
import json

def fix_json_encoding(directory):
    # Loop over all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            file_path = os.path.join(directory, filename)

            # Read the original JSON data
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)

            # Write the data back with ensure_ascii set to False
            with open(file_path, 'w', encoding='utf-8') as file:
                json.dump(data, file, ensure_ascii=False, indent=4)
            
            print(f"Fixed encoding for {filename}")

# Example usage
if __name__ == '__main__':
    directory = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/json-summaries'
    fix_json_encoding(directory)

Fixed encoding for f24-100-2022-2290109-eventdetails.json
Fixed encoding for f24b-100-2022-2290106-eventdetails.json
Fixed encoding for f24-100-2022-2359898-eventdetails.json
Fixed encoding for f24-100-2022-2290073-eventdetails.json
Fixed encoding for f24b-100-2022-2290032-eventdetails.json
Fixed encoding for f24-100-2022-2290015-eventdetails.json
Fixed encoding for f24-100-2022-2290121-eventdetails.json
Fixed encoding for f24-100-2022-2359860-eventdetails.json
Fixed encoding for f24-100-2022-2290097-eventdetails.json
Fixed encoding for f24b-100-2022-2290098-eventdetails.json
Fixed encoding for f24-100-2022-2359906-eventdetails.json
Fixed encoding for f24b-100-2022-2290048-eventdetails.json
Fixed encoding for f24-100-2022-2290009-eventdetails.json
Fixed encoding for f24-100-2022-2290047-eventdetails.json
Fixed encoding for f24-100-2022-2359884-eventdetails.json
Fixed encoding for f24-100-2022-2290021-eventdetails.json
Fixed encoding for f24-100-2022-2290115-eventdetails.json
Fixed enco

In [1]:
import os
import pandas as pd
import xml.etree.ElementTree as ET
import re

def load_mappings(xml_folder):
    player_mappings = {}
    team_mappings = {}
    for filename in os.listdir(xml_folder):
        if filename.endswith(".xml"):
            path = os.path.join(xml_folder, filename)
            tree = ET.parse(path)
            root = tree.getroot()
            # Load player mappings
            for player in root.findall(".//Player"):
                player_id = player.get("player_id")
                player_name = player.get("player_name")
                if player_id and player_name:
                    player_mappings[player_id] = player_name
            # Load team mappings from the root attributes
            home_team_id = root.get('home_team_id')
            home_team_name = root.get('home_team_name')
            away_team_id = root.get('away_team_id')
            away_team_name = root.get('away_team_name')
            team_mappings[home_team_id] = home_team_name
            team_mappings[away_team_id] = away_team_name
            print(f"Loaded {len(player_mappings)} player and {len(team_mappings)} team entries from {filename}.")
    return player_mappings, team_mappings

def replace_ids_with_names(csv_folder, player_mappings, team_mappings):
    for filename in os.listdir(csv_folder):
        if filename.endswith(".csv"):
            path = os.path.join(csv_folder, filename)
            df = pd.read_csv(path)
            # Process each row in the CSV file
            events_updated = 0
            for index, row in df.iterrows():
                summary = row['Summary']  # Assuming 'Summary' contains the event descriptions
                # Replace all IDs found with corresponding names
                matches = re.findall(r'\d+', summary)
                for match in matches:
                    if match in player_mappings:
                        summary = summary.replace(match, player_mappings[match])
                    elif match in team_mappings:
                        summary = summary.replace(match, team_mappings[match])
                df.at[index, 'Summary'] = summary
                events_updated += 1
            # Save the modified CSV file
            df.to_csv(path, index=False)
            print(f"Updated {events_updated} events in {filename}")

# Paths to the folders
xml_folder = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/pass-files'  # Update with your actual path
csv_folder = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries'  # Update with your actual path

# Load player and team mappings from XML files
player_mappings, team_mappings = load_mappings(xml_folder)

# Replace IDs in CSV files
replace_ids_with_names(csv_folder, player_mappings, team_mappings)


Loaded 16 player and 2 team entries from pass_matrix_100_2022_g2359864_t418.xml.
Loaded 32 player and 4 team entries from pass_matrix_100_2022_g2290059_t569.xml.
Loaded 47 player and 5 team entries from pass_matrix_100_2022_g2359898_t2289.xml.
Loaded 62 player and 7 team entries from pass_matrix_100_2022_g2290014_t1000.xml.
Loaded 78 player and 9 team entries from pass_matrix_100_2022_g2290013_t2592.xml.
Loaded 93 player and 9 team entries from pass_matrix_100_2022_g2290096_t272.xml.
Loaded 98 player and 10 team entries from pass_matrix_100_2022_g2290082_t569.xml.
Loaded 114 player and 11 team entries from pass_matrix_100_2022_g2290015_t239.xml.
Loaded 129 player and 12 team entries from pass_matrix_100_2022_g2359892_t401.xml.
Loaded 145 player and 12 team entries from pass_matrix_100_2022_g2359881_t545.xml.
Loaded 151 player and 12 team entries from pass_matrix_100_2022_g2290069_t401.xml.
Loaded 157 player and 12 team entries from pass_matrix_100_2022_g2290011_t569.xml.
Loaded 159 pla