In [2]:
import os
import csv
import xml.etree.ElementTree as ET

significant_events = {
    '1': 'Pass',
    '2': 'Offside Pass',
    '4': 'Foul',
    '6': 'Corner Awarded',
    '13': 'Shot off target',
    '14': 'Shot on post',
    '15': 'Shot saved',
    '7': 'Tackle',
    '8': 'Interception',
    '10': 'Save',
    '12': 'Clearance',
    '16': 'Goal',
    '17': 'Card'
}

def parse_xml(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()
    return root

def get_game_info(root):
    game_info = root.find('.//Game')
    if game_info is not None:
        return {
            "matchday": game_info.get('matchday')
        }
    return {}

def determine_zone(x, y):
    try:
        x = float(x)
        y = float(y)
        if x <= 16.7:
            x_zone = 1
        elif x <= 33.3:
            x_zone = 2
        elif x <= 50:
            x_zone = 3
        elif x <= 66.7:
            x_zone = 4
        elif x <= 83.3:
            x_zone = 5
        else:
            x_zone = 6

        if y <= 20:
            y_zone = 'A'
        elif y <= 40:
            y_zone = 'B'
        elif y <= 60:
            y_zone = 'C'
        elif y <= 80:
            y_zone = 'D'
        else:
            y_zone = 'E'

        return f"{x_zone}{y_zone}"
    except ValueError:
        return "Unknown"

def extract_event_data(event, event_name, game_info, recipient_id=None):
    player_id = event.get('player_id', 'Unknown Player ID')
    team_id = event.get('team_id', 'Unknown Team ID')
    minute = event.get('min', 'Unknown Minute')
    second = event.get('sec', 'Unknown Second')
    outcome = "Successful" if event.get('outcome', '1') == '1' else "Unsuccessful"
    x_start = event.get('x', '50')
    y_start = event.get('y', '50')
    start_zone = determine_zone(x_start, y_start)
    qualifiers = {q.get('qualifier_id'): q.get('value') for q in event.findall('.//Qualifier')}
    x_end = qualifiers.get('140', x_start)
    y_end = qualifiers.get('141', y_start)
    end_zone = determine_zone(x_end, y_end)
    summary = f"Matchday {game_info['matchday']}: {outcome} {event_name} in zone {start_zone}"
    if recipient_id and event_name == 'Pass':
        summary += f" by player {player_id} to player {recipient_id} in zone {end_zone} for team {team_id} at minute {minute}:{second}"
    else:
        summary += f" by player {player_id} for team {team_id} at minute {minute}:{second}"
    return {
        'summary': summary
    }

def process_xml_files(source_directory, target_directory):
    for filename in os.listdir(source_directory):
        if filename.startswith('f24') and filename.endswith('.xml'):
            file_path = os.path.join(source_directory, filename)
            xml_root = parse_xml(file_path)
            game_info = get_game_info(xml_root)
            events = list(xml_root.findall('.//Event'))
            events_list = []

            for i, event in enumerate(events):
                event_type = event.get('type_id')
                if event_type in significant_events:
                    next_event = events[i + 1] if i + 1 < len(events) else None
                    if event_type == '1' and next_event and event.get('team_id') == next_event.get('team_id') and event.get('outcome') == '1':
                        recipient_id = next_event.get('player_id')
                        event_data = extract_event_data(event, significant_events[event_type], game_info, recipient_id)
                    else:
                        event_data = extract_event_data(event, significant_events[event_type], game_info)
                    events_list.append(event_data['summary'])

            output_filename = filename.replace('.xml', '.csv')
            output_path = os.path.join(target_directory, output_filename)
            with open(output_path, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerow(['Summary'])  # Header
                for summary in events_list:
                    writer.writerow([summary])

            print(f"Processed {filename} into {output_path}")

if __name__ == '__main__':
    source_directory = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/f24-files'
    target_directory = '/Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries'
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)
    process_xml_files(source_directory, target_directory)


Processed f24-100-2022-2290109-eventdetails.xml into /Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries/f24-100-2022-2290109-eventdetails.csv
Processed f24-100-2022-2290030-eventdetails.xml into /Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries/f24-100-2022-2290030-eventdetails.csv
Processed f24b-100-2022-2290056-eventdetails.xml into /Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries/f24b-100-2022-2290056-eventdetails.csv
Processed f24b-100-2022-2290068-eventdetails.xml into /Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries/f24b-100-2022-2290068-eventdetails.csv
Processed f24-100-2022-2290055-eventdetails.xml into /Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries/f24-100-2022-2290055-eventdetails.csv
Processed f24-100-2022-2290092-eventdetails.xml into /Users/jesperpilegaard/Desktop/Superliga 2022-2023/csv-summaries/f24-100-2022-2290092-eventdetails.csv
Processed f24-100-2022-2290074-eventdetails.xml into /Users/