Data wrangling the Events's Json and generate events.csv file

In [74]:
import os
import pandas as pd
import json

In [75]:
# Folder
folder_path = 'D:\\Python\\2024\\Scraping\\scraping_events_tcc\\json_renamed'

In [76]:
def ExtractorEventsInfo(event_data):
    template = {
        'event_id': 'id',
        'slug': 'slug',
        'custom_id': 'customId',
        'periods': 'defaultPeriodCount',
        'period_length': 'defaultPeriodLength',
        'overtime_length': 'defaultOvertimeLength',
        'injury_p1': 'time.injuryTime1',
        'injury_p2': 'time.injuryTime2',
        'tournament_name': 'tournament.name',
        'tournament_slug': 'tournament.slug',
        'tournament_country': 'tournament.category.country.name',
        'tournament_country_c': 'tournament.category.country.alpha2',
        'stats_players': 'tournament.uniqueTournament.hasEventPlayerStatistics',
        'season_year': 'season.year',
        'round': 'roundInfo.round',
        'match_city': 'venue.city.name',
        'match_stadium': 'venue.stadium.name',
        'match_stadium_capacity': 'venue.stadium.capacity',
        'match_country': 'venue.country.name',
        'match_country_c': 'venue.country.alpha2',
        'hometeam': 'homeTeam.slug',
        'ht_namecode': 'homeTeam.nameCode',
        'ht_manager': 'homeTeam.manager.slug',
        'ht_manager_id': 'homeTeam.manager.id',
        'ht_manager_country': 'homeTeam.manager.country.name',
        'ht_manager_country_c': 'homeTeam.manager.country.alpha2',
        'ht_city': 'homeTeam.venue.city.name',
        'ht_stadium': 'homeTeam.venue.stadium.name',
        'ht_stadium_capacity': 'homeTeam.venue.stadium.capacity',
        'ht_country': 'homeTeam.venue.country.name',
        'ht_country_c': 'homeTeam.venue.country.alpha2',
        'hometeam_id': 'homeTeam.id',
        'homescore_p1': 'homeScore.period1',
        'homescore_p2': 'homeScore.period2',
        'homescore_final': 'homeScore.current',
        'awayteam': 'awayTeam.slug',
        'at_namecode': 'awayTeam.nameCode',
        'at_city': 'awayTeam.venue.city.name',
        'at_stadium': 'awayTeam.venue.stadium.name',
        'at_stadium_capacity': 'awayTeam.venue.stadium.capacity',
        'at_country': 'awayTeam.venue.country.name',
        'at_country_c': 'awayTeam.venue.country.alpha2',
        'awayteam_id': 'awayTeam.id',
        'awayscore_p1': 'awayScore.period1',
        'awayscore_p2': 'awayScore.period2',
        'awayscore_final': 'awayScore.current'
    }

    result = {}
    for key, path in template.items():
        keys = path.split('.')
        value = event_data
        for k in keys:
            if isinstance(value, dict) and k in value:
                value = value[k]
            else:
                value = None
                break
        result[key] = value

    return result

In [77]:
events_df = []

for filename in os.listdir(folder_path):
    try:
        if filename.endswith('.json') and 'event' in filename:
            json_file_path = os.path.join(folder_path, filename)

            with open(json_file_path, 'r', encoding='utf-8') as file:
                ojson = json.load(file)

            if 'event' in ojson:
                ojson_df = ExtractorEventsInfo(ojson['event'])
                df = pd.DataFrame([ojson_df])

                date, code, _ = filename.split('_')[1:4]
                df.insert(0, 'date', date)
                df.insert(1, 'code', code)

                events_df.append(df)

    except Exception as e:
        print(f"Erro ao processar o arquivo {filename}: {str(e)}")
        # Imprimir informações adicionais sobre a exceção
        print(f"Tipo de exceção: {type(e)}")
        print(f"Informações detalhadas sobre a exceção: {e}")

# Concatena os DataFrames em um único DataFrame
result_df = pd.concat(events_df, ignore_index=True)

result_df.to_csv('events.csv', index=False)
