Data wrangling the Stats's Json and generate stats.csv file

In [1]:
import os
import pandas as pd
import json

In [2]:
# Folder
folder_path = 'D:\\Python\\2024\\Scraping\\scraping_events_tcc\\json_renamed'

In [3]:
def extract_stats(period_data):
    periods = []
    group_names = []
    stat_names = []
    home_values = []
    away_values = []
    stat_types = []

    # Iteration over periods
    for period in period_data:
        # Iteration over groups
        for group_data in period['groups']:
            group_name = group_data['groupName']

            # Iteration over stats
            for stat_item in group_data['statisticsItems']:
                stat_name = stat_item['name']
                stat_type = stat_item['statisticsType']
                home_value = stat_item['homeValue']
                away_value = stat_item['awayValue']

                # Appending values to the lists
                periods.append(period['period'])
                group_names.append(group_name)
                stat_names.append(stat_name)
                stat_types.append(stat_type)
                home_values.append(home_value)
                away_values.append(away_value)

    return periods, group_names, stat_names, stat_types, home_values, away_values

def process_stats_json_folder(folder_path):
    stats_df = []

    # Iteration over files in the folder
    for filename in os.listdir(folder_path):
        try:
            if filename.endswith('.json') and 'stats' in filename:
                json_file_path = os.path.join(folder_path, filename)

                with open(json_file_path, 'r', encoding='utf-8') as file:
                    data = json.load(file)

                if 'statistics' in data:
                    periods, group_names, stat_names, stat_types, home_values, away_values = extract_stats(data['statistics'])
                    df = pd.DataFrame({
                        'period': periods,
                        'group_name': group_names,
                        'stat_name': stat_names,
                        'stat_types': stat_types,
                        'home_value': home_values,
                        'away_value': away_values
                    })

                    date, code, _ = filename.split('_')[1:4]
                    df.insert(0, 'date', date)
                    df.insert(1, 'code', code)

                    stats_df.append(df)

        except Exception as e:
            print(f'Error processing file {filename}: {str(e)}')
            print(f'Exception type: {type(e)}')
            print(f'Detailed exception information: {e}')

    result_df = pd.concat(stats_df, ignore_index=True)
    return result_df


result_df = process_stats_json_folder(folder_path)
result_df.to_csv('stats.csv', index=False)
