In [1]:
import pandas as pd
import requests
from google.colab import userdata
import json

In [2]:
X_RAPIDAPI_HOST = 'v3.football.api-sports.io'
KEY = userdata.get('KEY')
drive_path_datasets = '/content/drive/MyDrive/TFM/Prototipo/datasets'

In [3]:
# @title
def req_football_api(endpoint:str, params:dict=None, host_url:str=X_RAPIDAPI_HOST, key:str=KEY):
  '''
  Retorna:
  ------
  dict
      Un diccionario que contiene la respuesta JSON de la API en caso de Ã©xito,
      o None si ocurre un error.
  '''

  url = f'https://{host_url}/{endpoint}'

  headers = {
    'Accept': 'application/json',
    'Accept-Encoding': 'gzip, deflate',
    'Connection': 'keep-alive',
    'x-rapidapi-host': X_RAPIDAPI_HOST,
    'x-rapidapi-key': KEY
  }

  try:
      response = requests.get(url, headers=headers, params=params)
      response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
      # if response is list

      data = response.json()

      if data['response']:
        print('API Call Successful!')
      else:
        print('No data returned from API.')
        print(data["errors"])
  except requests.exceptions.HTTPError as http_err:
      print(f'HTTP error occurred: {http_err}')
  except requests.exceptions.ConnectionError as conn_err:
      print(f'Connection error occurred: {conn_err}')
  except requests.exceptions.Timeout as timeout_err:
      print(f'Timeout error occurred: {timeout_err}')
  except requests.exceptions.RequestException as req_err:
      print(f'An error occurred: {req_err}')
  finally:
      print('API Call Completed.')

  return data['response']

In [4]:
# @title
def save_dict_to_csv(data_dict: dict, filename: str, path: str):
  file_path = f'{path}/{filename}'
  try:
    df = pd.DataFrame(data_dict)
    df.to_csv(file_path, index=False)
    print(f"Data successfully saved to {file_path}")
  except Exception as e:
    print(f"Error saving dictionary to CSV at {file_path}: {e}")

In [5]:
# @title
def print_json(json_data):
  if json_data and 'response' in json_data and isinstance(json_data['response'], list):
      print(json.dumps(json_data['response'][:5], indent=2, ensure_ascii=False))
  elif isinstance(json_data, list):
      print(json.dumps(json_data[:5], indent=2, ensure_ascii=False))
  else:
      print(json.dumps(json_data, indent=2, ensure_ascii=False))

In [6]:
# @title
def save_json(data, filename: str, path: str, encoding:str = 'utf-8'):
  file_path = f'{path}/{filename}'
  try:
    with open(file_path, 'w', encoding=encoding) as f:
      json.dump(data, f, indent=2, ensure_ascii=False)
    print(f'Data successfully saved to {file_path}')
  except IOError as e:
    print(f'Error saving file to {file_path}: {e}')

In [7]:
# @title
def load_json(filename: str, path: str):
  file_path = f'{path}/{filename}'
  try:
    with open(file_path, 'r', encoding='utf-8') as f:
      data = json.load(f)
    #print(f'Data successfully loaded from {file_path}')
    return data
  except FileNotFoundError:
    print(f'Error: The file {file_path} was not found.')
    return None
  except json.JSONDecodeError as e:
    print(f'Error decoding JSON from {file_path}: {e}')
    return None
  except IOError as e:
    print(f'Error loading file from {file_path}: {e}')
    return None

In [8]:
def process_and_verify_json_encoding(source_filename: str,source_path: str,dest_filename: str,dest_path: str):
    print(f"\n--- Processing file: {source_filename} ---")

    # 1. Load the JSON file. `json.load` automatically decodes \uXXXX.
    print(f"Loading {source_filename} from {source_path}...")
    loaded_data = load_json(source_filename, source_path)

    if loaded_data:
        #print("\nContent after loading (first entry, with decoded Unicode characters):")
        #print_json(loaded_data[0])

        # 2. Save the loaded data to a new JSON file with UTF-8 encoding.
        # save_json now uses ensure_ascii=False and encoding='utf-8'.
        print(f"\nSaving processed data to {dest_filename} in {dest_path}...")
        save_json(loaded_data, dest_filename, dest_path)
    else:
        print(f"Failed to load {source_filename}. Processing stopped.")

In [9]:
# @title
#Leagues

"""
dict_leagues = []
for x in loaded_json:
    for season in x['seasons']:
        dict_leagues.append({
            'id': x['league']['id'],
            'name': x['league']['name'],
            'country': x['country']['name'],
            'season': season['year'],
            'logo': x['league']['logo']
        })


print(dict_leagues)
save_dict_to_csv(dict_leagues, csv_file_to_save, drive_path_datasets)
"""

"\ndict_leagues = []\nfor x in loaded_json:\n    for season in x['seasons']:\n        dict_leagues.append({\n            'id': x['league']['id'],\n            'name': x['league']['name'],\n            'country': x['country']['name'],\n            'season': season['year'],\n            'logo': x['league']['logo']\n        })\n\n\nprint(dict_leagues)\nsave_dict_to_csv(dict_leagues, csv_file_to_save, drive_path_datasets)\n"

In [10]:
# @title
#Timezones

"""
dict_timezones = [{'timezone':t} for t in loaded_json]
print(dict_timezones)
save_dict_to_csv(dict_timezones, 'timezones.csv', drive_path_datasets)
"""

"\ndict_timezones = [{'timezone':t} for t in loaded_json]\nprint(dict_timezones)\nsave_dict_to_csv(dict_timezones, 'timezones.csv', drive_path_datasets)\n"

In [11]:
# @title
'''

endpoint = 'countries'
json_file_to_save = 'countries.json'
csv_file_to_save = 'countries.csv'
params = None

#Countries

dict_countries = [
  {'name': f['name'],
   'code': f['code'],
   'flag': f['flag']
  } for f in loaded_json

]

print(dict_countries[:1])
save_dict_to_csv(dict_countries, csv_file_to_save, drive_path_datasets)

'''

"\n\nendpoint = 'countries'\njson_file_to_save = 'countries.json'\ncsv_file_to_save = 'countries.csv'\nparams = None\n\n#Countries\n\ndict_countries = [\n  {'name': f['name'],\n   'code': f['code'],\n   'flag': f['flag']\n  } for f in loaded_json\n\n]\n\nprint(dict_countries[:1])\nsave_dict_to_csv(dict_countries, csv_file_to_save, drive_path_datasets)\n\n"

In [12]:
# @title
# Team test
"""
endpoint = 'teams'
json_file_to_save = 'teams.json'
csv_file_to_save = 'teams.csv'
params = {
    'country': 'Spain'
    }

dict_teams = [
  {'id': str(f['team']['id']),
   'name': f['team']['name'],
   'code': f['team']['code'],
   'country': f['team']['country'],
   'founded': f['team']['founded'],
   'national': f['team']['national'],
   'logo': f['team']['logo']
  } for f in loaded_json

]

print(dict_teams[1])
"""

"\nendpoint = 'teams'\njson_file_to_save = 'teams.json'\ncsv_file_to_save = 'teams.csv'\nparams = {\n    'country': 'Spain'\n    }\n\ndict_teams = [\n  {'id': str(f['team']['id']),\n   'name': f['team']['name'],\n   'code': f['team']['code'],\n   'country': f['team']['country'],\n   'founded': f['team']['founded'],\n   'national': f['team']['national'],\n   'logo': f['team']['logo']\n  } for f in loaded_json\n\n]\n\nprint(dict_teams[1])\n"

In [13]:
# @title
#Teams
"""
import time

countries = load_json('countries/countries.json', drive_path_datasets)

if not countries:
  raise Exception('Countries not loaded')


countries_list = [country['name'] for country in countries]

endpoint = 'teams'
dict_teams = []


start = 0
i = start

for c in countries_list[start:]:
  params = {
    'country': c
  }

  data = []

  json_file_to_save = f'{i}_team_{c}.json'
  csv_file_to_save = f'{i}_team_{c}.csv'

  sleep = 61
  if i % 10 == 0 and i != start:
    print(f"Sleeping for {sleep} seconds...")
    #time.sleep(sleep)

  #data = req_football_api(endpoint, params)
  #print(f"Adding country #{i}: {c}")
  i += 1

  if data:
    time.sleep(5)
    save_json(data, json_file_to_save, drive_path_datasets+'/teams')

  loaded_json = load_json(json_file_to_save, drive_path_datasets+'/teams')
  process_and_verify_json_encoding(json_file_to_save, drive_path_datasets+'/teams', json_file_to_save, drive_path_datasets+'/teams')

  dict_teams_t = [
    {'id': str(f['team']['id']),
    'name': f['team']['name'],
    'code': f['team']['code'],
    'country': f['team']['country'],
    'founded': f['team']['founded'],
    'national': f['team']['national'],
    'logo': f['team']['logo']
    } for f in loaded_json
  ]


  dict_teams.extend(dict_teams_t)
  save_dict_to_csv(dict_teams, 'teams.csv', drive_path_datasets)

  print('\n')

#print(dict_teams)
"""


'\nimport time\n\ncountries = load_json(\'countries/countries.json\', drive_path_datasets)\n\nif not countries:\n  raise Exception(\'Countries not loaded\')\n\n\ncountries_list = [country[\'name\'] for country in countries]\n\nendpoint = \'teams\'\ndict_teams = []\n\n\nstart = 0\ni = start\n\nfor c in countries_list[start:]:\n  params = {\n    \'country\': c\n  }\n\n  data = []\n\n  json_file_to_save = f\'{i}_team_{c}.json\'\n  csv_file_to_save = f\'{i}_team_{c}.csv\'\n\n  sleep = 61\n  if i % 10 == 0 and i != start:\n    print(f"Sleeping for {sleep} seconds...")\n    #time.sleep(sleep)\n\n  #data = req_football_api(endpoint, params)\n  #print(f"Adding country #{i}: {c}")\n  i += 1\n\n  if data:\n    time.sleep(5)\n    save_json(data, json_file_to_save, drive_path_datasets+\'/teams\')\n\n  loaded_json = load_json(json_file_to_save, drive_path_datasets+\'/teams\')\n  process_and_verify_json_encoding(json_file_to_save, drive_path_datasets+\'/teams\', json_file_to_save, drive_path_dataset

In [16]:
# Fixtures

# --------------- PARAMETROS --------------- #
endpoint = 'fixtures'

params = {
    'league': '39',
    'season': '2022',
    'timezone': 'America/Costa_Rica'
}

json_file_to_save = 'fixtures/premier_2022.json'
csv_file_to_save = 'fixtures/premier_2022.csv'

# --------------- PARAMETROS --------------- #


data = req_football_api(endpoint, params)
save_json(data, json_file_to_save, drive_path_datasets)
loaded_json = load_json(json_file_to_save, drive_path_datasets)


dict_fixtures = [
  {'id':str(f['fixture']['id']),
   'league': f['league']['id'],
   'season': f['league']['season'],
   'venue': f['fixture']['venue']['id'],
   'referee': f['fixture']['referee'],
   'home_team': f['teams']['home']['id'],
   'away_team': f['teams']['away']['id'],
   'date':f['fixture']['date'],
   'timezone':f['fixture']['timezone'],
   'half_time_score_home': f['score']['halftime']['home'],
   'half_time_score_away': f['score']['halftime']['away'],
   'half_time_winner': 'home' if f['score']['halftime']['home'] > f['score']['halftime']['away'] else ('away' if f['score']['halftime']['home'] < f['score']['halftime']['away'] else 'tie'),
   'full_time_score_home': f['score']['fulltime']['home'],
   'full_time_score_away': f['score']['fulltime']['away'],
   'full_time_winner': 'home' if f['teams']['home']['winner'] else ('away' if f['teams']['away']['winner'] else 'tie'),
  } for f in loaded_json
]

print(dict_fixtures[:1])
save_dict_to_csv(dict_fixtures, csv_file_to_save, drive_path_datasets)


API Call Successful!
API Call Completed.
Data successfully saved to /content/drive/MyDrive/TFM/Prototipo/datasets/fixtures/premier_2022.json
[{'id': '867946', 'league': 39, 'season': 2022, 'venue': 525, 'referee': 'A. Taylor', 'home_team': 52, 'away_team': 42, 'date': '2022-08-05T13:00:00-06:00', 'timezone': 'America/Costa_Rica', 'half_time_score_home': 0, 'half_time_score_away': 1, 'half_time_winner': 'away', 'full_time_score_home': 0, 'full_time_score_away': 2, 'full_time_winner': 'away'}]
Data successfully saved to /content/drive/MyDrive/TFM/Prototipo/datasets/fixtures/premier_2022.csv
