In [None]:
#| hide
#| default_exp clean_datasets

# Clean datasets

> Clean raw datasets for the Shiny app.

In [None]:
#| export

import warnings
warnings.filterwarnings('ignore')

import json
import logging
import os
import requests

import pandas as pd

# NOTE: Had to install the package with the following command for the import to work.
# python3 -m pip install -e '.[dev]'
from isl_2024.utils import *

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
try:
    # This will work when running as a script
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # This will work when running in a Jupyter notebook
    script_dir = os.getcwd()

parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
log_dir = os.path.join(parent_dir, 'logs')
data_dir = os.path.join(parent_dir, 'data')
clean_data_dir = os.path.join(parent_dir, 'data/clean')

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

if not os.path.exists(data_dir):
    os.makedirs(data_dir)

if not os.path.exists(clean_data_dir):
    os.makedirs(clean_data_dir)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'clean_datasets.log'), filemode='a')

# Matches

In [None]:
#| export

with open(os.path.join(data_dir, 'matches.txt'), encoding='utf-8') as f:
    matches = json.loads(f.readlines()[-1])['matches']

df = []
for match in matches:
    df.append({
        'start_at': match['start_date'],
        'end_at': match['end_date'],
        'home_team': match['participants'][0]['name'],
        'away_team': match['participants'][1]['name'],
        'score': match['winning_margin'],
    })
df = pd.DataFrame(df)
df['start_at'] = pd.to_datetime(df['start_at'])
df['end_at'] = pd.to_datetime(df['start_at'])
df['match_id'] = df.index + 1
df = df[['match_id', 'start_at', 'end_at', 'home_team', 'away_team', 'score']]

df.to_csv(os.path.join(clean_data_dir, 'matches.csv'), index=False)

print(df.shape)
df.head()

(84, 6)


Unnamed: 0,match_id,start_at,end_at,home_team,away_team,score
0,1,2024-09-13 19:30:00+05:30,2024-09-13 19:30:00+05:30,Mohun Bagan Super Giant,Mumbai City FC,2 - 2
1,2,2024-09-14 17:00:00+05:30,2024-09-14 17:00:00+05:30,Odisha FC,Chennaiyin FC,2 - 3
2,3,2024-09-14 19:30:00+05:30,2024-09-14 19:30:00+05:30,Bengaluru FC,East Bengal FC,1 - 0
3,4,2024-09-15 19:30:00+05:30,2024-09-15 19:30:00+05:30,Kerala Blasters FC,Punjab FC,
4,5,2024-09-16 19:30:00+05:30,2024-09-16 19:30:00+05:30,Mohammedan SC,NorthEast United FC,
