In [None]:
import json
import glob
import pandas as pd
from pydantic import BaseModel
from datetime import datetime, timedelta, date
from dateutil.parser import parse




class Race(BaseModel):
    race_id: str
    race_number: str
    runnercount: int
    posttime: datetime
    estimatedPosttime: datetime
    offTime: datetime
    weather: str
    going: str
    race_name: str
    race_status: str

class Fixture(BaseModel):
    fixture_id: int
    fixture_date: date
    racecount: int
    temperature_fahrenheit: int
    temperature_celsius: float
    firstposttime: datetime
    races: list[Race]






In [None]:
# list of all files in downloads
files = glob.glob('downloads/*.json')

fixture_data = []
race_data = []
entry_data = []
price_data = []
for file in files:
    with open(file, 'r') as f:
        result_fofixtures = json.load(f)

    if result_fofixtures:
        fixtures: list = result_fofixtures.get('fixtures', [])

    for fixture in fixtures:
        fixture_header: dict = fixture.get('header', {})
        fixture_id: int = fixture_header.get('id')
        fixture_date: date = parse(fixture_header.get('date')).date() if fixture_header.get('date') else None
        racecount: int = fixture_header.get('racecount')
        temperature: dict = fixture_header.get('temperature',{})
        temperature_fahrenheit: int = temperature.get('fahrenheit')
        temperature_celsius: float = temperature.get('celsius')
        firstposttime: datetime = parse(fixture_header.get('firstposttime')) if fixture_header.get('firstposttime') else None
        races: list = fixture.get('races', [])

        fixture_data.append({
            'fixture_id': fixture_id,
            'fixture_date': fixture_date,
            'racecount': racecount,
            'temperature_fahrenheit': temperature_fahrenheit,
            'temperature_celsius': temperature_celsius,
            'firstposttime': firstposttime,
        })
        
        for race in races:
            race_id: str = race.get('id')
            race_number: str = race.get('number')
            runnercount: int = race.get('runnercount')
            posttime: datetime = parse(race.get('posttime')) if race.get('posttime') else None
            estimatedPosttime: datetime = parse(race.get('estimatedPosttime')) if race.get('estimatedPosttime') else None
            offTime: datetime = parse(race.get('offTime')) if race.get('offTime') else None
            weather: str = race.get('weather')
            going: str = race.get('going')
            race_name: str = race.get('name')
            race_status: str = race.get('status')
            race_statusHistory: list = race.get('statusHistory') # NOTE:  loop through this
            tracksurface_dict: dict = race.get('tracksurface', {})
            tracksurface: str = tracksurface_dict.get('description')
            overround: str = race.get('overround')
            overround_selection: str = race.get('overround_selection')
            entries: list = race.get('entries', [])
            race_data.append({
                'race_id': race_id,
                'race_number': race_number,
                'runnercount': runnercount,
                'posttime': posttime,
                'estimatedPosttime': estimatedPosttime,
                'offTime': offTime,
                'weather': weather,
                'going': going,
                'race_name': race_name,
                'race_status': race_status,
                'tracksurface': tracksurface,
                'overround': overround,
                'overround_selection': overround_selection,
            })

            for entry in entries:
                entry: dict = entries[0] # loop
                entry_id: int = entry.get('id')
                entry_startNumber: int = entry.get('startNumber')
                entry_programNumber: int = entry.get('programNumber')
                entry_startPosition: int = entry.get('startPosition')
                entry_coupledIndicator: int = entry.get('coupledIndicator')
                entry_decoupledNumber: str = entry.get('decoupledNumber')
                entry_horse_id: int = entry.get('horse_id')
                entry_name: str = entry.get('name')
                entry_status: int = entry.get('status')
                entry_weight: dict = entry.get('weight', {})
                entry_weight_value: int = entry_weight.get('value')
                entry_weight_unit: str = entry_weight.get('unit')
                entry_jockey: dict = entry.get('jockey', {})
                entry_jockey_id: str = entry_jockey.get('id')
                entry_jockey_name: str = entry_jockey.get('name')
                entry_startingPrice: dict = entry.get('startingPrice', {})
                entry_startingPrice_nominator: str = entry_startingPrice.get('nominator')
                entry_startingPrice_denominator: str = entry_startingPrice.get('denominator')
                entry_favPos: str = entry.get('favPos')
                entry_favJoint: str = entry.get('favJoint')
                entry_finalPosition: dict = entry.get('finalPosition', {})
                entry_finalPosition_position: str = entry_finalPosition.get('position')
                entry_finalPosition_deadHeat: str = entry_finalPosition.get('deadHeat')
                entry_finalPosition_disqualified: bool = entry_finalPosition.get('disqualified')
                entry_finalPosition_amendedPosition: str = entry_finalPosition.get('amendedPosition')
                prices: list = entry.get('showPrices', []) # NOTE: this is a list of dicts
                for price in prices:
                    price_timestamp: datetime = parse(price.get('timestamp')) if price.get('timestamp') else None
                    price_numerator: str = price.get('numerator')
                    price_denominator: str = price.get('denominator')
                    price_market: str = price.get('market')
                    price_data.append({
                        'price_timestamp': price_timestamp,
                        'price_numerator': price_numerator,
                        'price_denominator': price_denominator,
                        'price_market': price_market,
                    })




df = pd.DataFrame(price_data)
df


In [None]:
file = '/home/gazi/main_dir/data_collection_1st/downloads/1752138913921.json'
with open(file, 'r') as f:
    result_fofixtures = json.load(f)


if result_fofixtures:
    fixtures: list = result_fofixtures.get('fixtures', [])


fixture: dict = fixtures[0] # loop
fixture_header: dict = fixture.get('header', {})
fixture_id: int = fixture_header.get('id')
fixture_date: date = parse(fixture_header.get('date')).date() if fixture_header.get('date') else None
fixture_racecount: int = fixture_header.get('racecount')
fixture_temperature: dict = fixture_header.get('temperature',{})
fixture_temperature_fahrenheit: int = fixture_temperature.get('fahrenheit')
fixture_temperature_celsius: float = fixture_temperature.get('celsius')
fixture_firstposttime: datetime = parse(fixture_header.get('firstposttime')) if fixture_header.get('firstposttime') else None
track: dict = fixture.get('track', {})
track_id: str = track.get('id')
track_name: str = track.get('name')
races: list = fixture.get('races', [])

race: dict = races[0] # loop
race_id: str = race.get('id')
race_number: str = race.get('number')
race_runnercount: int = race.get('runnercount')
race_posttime: datetime = parse(race.get('posttime')) if race.get('posttime') else None
race_estimatedPosttime: datetime = parse(race.get('estimatedPosttime')) if race.get('estimatedPosttime') else None
race_offTime: datetime = parse(race.get('offTime')) if race.get('offTime') else None
race_weather: str = race.get('weather')
race_going: str = race.get('going')
race_name: str = race.get('name')
race_status: list = race.get('status')
race_statusHistory: list = race.get('statusHistory')
race_tracksurface: dict = race.get('tracksurface', {})
race_tracksurface_value: str = race_tracksurface.get('value')
race_overround: str = race.get('overround')
race_overround_selection: str = race.get('overround_selection')
entries: list = race.get('entries', [])

entry: dict = entries[0] # loop
entry_id: int = entry.get('id')
entry_startNumber: int = entry.get('startNumber')
entry_programNumber: int = entry.get('programNumber')
entry_startPosition: int = entry.get('startPosition')
entry_coupledIndicator: int = entry.get('coupledIndicator')
entry_decoupledNumber: str = entry.get('decoupledNumber')
entry_horse_id: int = entry.get('horse_id')
entry_name: str = entry.get('name')
entry_status: int = entry.get('status')
entry_weight: dict = entry.get('weight', {})
entry_weight_value: int = entry_weight.get('value')
entry_weight_unit: str = entry_weight.get('unit')
entry_jockey: dict = entry.get('jockey', {})
entry_jockey_id: str = entry_jockey.get('id')
entry_jockey_name: str = entry_jockey.get('name')
entry_startingPrice: dict = entry.get('startingPrice', {})
entry_startingPrice_nominator: str = entry_startingPrice.get('nominator')
entry_startingPrice_denominator: str = entry_startingPrice.get('denominator')
entry_favPos: str = entry.get('favPos')
entry_favJoint: str = entry.get('favJoint')
entry_finalPosition: dict = entry.get('finalPosition', {})
entry_finalPosition_position: str = entry_finalPosition.get('position')
entry_finalPosition_deadHeat: str = entry_finalPosition.get('deadHeat')
entry_finalPosition_disqualified: bool = entry_finalPosition.get('disqualified')
entry_finalPosition_amendedPosition: str = entry_finalPosition.get('amendedPosition')
prices: list = entry.get('showPrices', []) # NOTE: this is a list of dicts

price = prices[0] # loop
price_timestamp: datetime = parse(price.get('timestamp')) if price.get('timestamp') else None
price_numerator: str = price.get('numerator')
price_denominator: str = price.get('denominator')
price_market: str = price.get('market')




