In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from os.path import join as p_join
from dotenv import load_dotenv
load_dotenv()
from pathlib import Path
import sys
import json
import pickle
from typing import List, Tuple, Dict, Set, Any, Optional, Callable
from pathlib import Path
sys.path.insert(0, str(Path(os.getcwd()).parent))
import requests
from tqdm import tqdm
try:
    from fabulous import color as fb_color
    color_print = lambda x, color='green': print(getattr(fb_color, color)(x)) if 'fb_color' in globals() else print(x)
except Exception as e:
    color_print = lambda x, color='green': print(x)

from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from minio import Minio

In [9]:
from src.minio_utils import (
    minio_container_ipaddr, 
    initialize_minio_client
)

from src.parse_utils import get_events_list, get_events_info, get_fighters_info, get_one_fight_stats
from src.processing import eventslist2df
from src.pipelines.parse_all_fights import parse_all_fights

In [4]:
matplotlib.rcParams['figure.figsize'] = (8, 8)
sns.set_style('whitegrid')

---

In [5]:
MINIO_ACCESS_KEY = os.environ['MINIO_ACCESS_KEY']
MINIO_SECRET_KEY = os.environ['MINIO_SECRET_KEY']

PROJECT_FOLDER = str(Path(os.getcwd()).parent)
PARSED_DATA_PATH = p_join(str(Path(os.getcwd()).parent), 'data/raw/all_fights.json')
print(f"PROJECT_FOLDER: {PROJECT_FOLDER}")
print(f"PARSED_DATA_PATH: {PARSED_DATA_PATH}")

PROJECT_FOLDER: /home/aiandrejcev/ufc
PARSED_DATA_PATH: /home/aiandrejcev/ufc/data/raw/all_fights.json


In [6]:
fights_list, status_ok = get_events_list()
print(status_ok)
print(len(fights_list))
fights_list[:3]

True
681


[{'event_url': 'http://www.ufcstats.com/event-details/a9df5ae20a97b090',
  'event_name': "UFC 299: O'Malley vs. Vera 2",
  'date': 'March 09, 2024',
  'location': 'Miami, Florida, USA'},
 {'event_url': 'http://www.ufcstats.com/event-details/e4a9dbade7c7e1a7',
  'event_name': 'UFC Fight Night: Rozenstruik vs. Gaziev',
  'date': 'March 02, 2024',
  'location': 'Las Vegas, Nevada, USA'},
 {'event_url': 'http://www.ufcstats.com/event-details/902ab9197b83d0db',
  'event_name': 'UFC Fight Night: Moreno vs. Royval 2',
  'date': 'February 24, 2024',
  'location': 'Mexico City, Distrito Federal, Mexico'}]

In [7]:
fights_df = eventslist2df(fights_list)
fights_df

Unnamed: 0,event_url,event_name,date,location
0,http://www.ufcstats.com/event-details/a9df5ae2...,UFC 299: O'Malley vs. Vera 2,"March 09, 2024","Miami, Florida, USA"
1,http://www.ufcstats.com/event-details/e4a9dbad...,UFC Fight Night: Rozenstruik vs. Gaziev,"March 02, 2024","Las Vegas, Nevada, USA"
2,http://www.ufcstats.com/event-details/902ab919...,UFC Fight Night: Moreno vs. Royval 2,"February 24, 2024","Mexico City, Distrito Federal, Mexico"
3,http://www.ufcstats.com/event-details/dab0e6cb...,UFC 298: Volkanovski vs. Topuria,"February 17, 2024","Anaheim, California, USA"
4,http://www.ufcstats.com/event-details/eaea0fc7...,UFC Fight Night: Hermansson vs. Pyfer,"February 10, 2024","Las Vegas, Nevada, USA"
...,...,...,...,...
676,http://www.ufcstats.com/event-details/1c3f5e85...,UFC 6: Clash of the Titans,"July 14, 1995","Casper, Wyoming, USA"
677,http://www.ufcstats.com/event-details/dedc3bb4...,UFC 5: The Return of the Beast,"April 07, 1995","Charlotte, North Carolina, USA"
678,http://www.ufcstats.com/event-details/b60391da...,UFC 4: Revenge of the Warriors,"December 16, 1994","Tulsa, Oklahoma, USA"
679,http://www.ufcstats.com/event-details/1a49e067...,UFC 3: The American Dream,"September 09, 1994","Charlotte, North Carolina, USA"


In [11]:
minio_client = initialize_minio_client(
	ipaddr=minio_container_ipaddr(),
	access_key=MINIO_ACCESS_KEY,
	secret_key=MINIO_SECRET_KEY,
	port_number=9000
)
minio_client

<minio.api.Minio at 0x7fe5d863e640>

In [47]:
############################### Loading all fights info from minio ####################################
all_fights_list = None

try:
    minio_client = Minio(
        # endpoint="172.17.0.2:9000",
        endpoint=f"{minio_container_ipaddr()}:9000",
        access_key=MINIO_ACCESS_KEY,
        secret_key=MINIO_SECRET_KEY,
        secure=False,
    )

    minio_client.fget_object(
        bucket_name='ufc-raw-data',
        object_name='ufc_stats.json',
        file_path=PARSED_DATA_PATH,
    )

    all_fights_list = json.load( open(PARSED_DATA_PATH, mode='r', encoding='utf-8'))
    os.remove(PARSED_DATA_PATH)
except Exception as e:
    print("Can't load all_fights_list!")
    print(e, end='\n'*2)

############################## put set of parsed events to minio ##############################
try:    
    events_set = set([fight_info['event_uri'] for fight_info in all_fights_list])
except Exception as e:
    events_set = None
    print(e, end='\n'*2)

# parsed_events_set_path = p_join(PROJECT_FOLDER, 'data/raw/', 'parsed_events_set.pkl')
# pickle.dump(
#     events_set, 
#     open(parsed_events_set_path, mode='wb'), 
# )

# # creating Minio client
# minio_client = Minio(
#     endpoint="172.17.0.2:9000",
#     access_key=MINIO_ACCESS_KEY,
#     secret_key=MINIO_SECRET_KEY,
#     secure=False,
# )

# minio_client.fput_object(
#     bucket_name='ufc-raw-data',
#     object_name='parsed_events_set.pkl',
#     file_path=parsed_events_set_path,
# )
# os.remove(parsed_events_set_path)

In [8]:
minio_client = Minio(
    # endpoint="172.17.0.2:9000",
    endpoint=f"{minio_container_ipaddr()}:9000",
    access_key=MINIO_ACCESS_KEY,
    secret_key=MINIO_SECRET_KEY,
    secure=False,
)

minio_client.fget_object(
    bucket_name='ufc-raw-data',
    object_name='ufc_stats.json',
    file_path=PARSED_DATA_PATH,
)

all_fights_list = json.load( open(PARSED_DATA_PATH, mode='r', encoding='utf-8'))
os.remove(PARSED_DATA_PATH)

In [48]:
all_fights_list_added = parse_all_fights(
    save_path=None,
    parsed_events_set=events_set,
)
if all_fights_list is None:
    all_fights_list = all_fights_list_added
else:
    all_fights_list.extend(all_fights_list_added)

print(len(all_fights_list))

json.dump(
    all_fights_list, 
    open(PARSED_DATA_PATH, mode='w', encoding='utf-8'), ensure_ascii=False, indent=2
)

status_ok: True


100%|██████████| 638/638 [00:11<00:00, 57.98it/s]


7035


In [50]:
#################################### Putting data to minio ################################################
all_fights_list = json.load(open(PARSED_DATA_PATH, mode='r', encoding='utf-8'))

# creating Minio client
minio_client = Minio(
    # endpoint="172.17.0.2:9000",
    endpoint=f"{minio_container_ipaddr()}:9000",
    access_key=MINIO_ACCESS_KEY,
    secret_key=MINIO_SECRET_KEY,
    secure=False,
)

minio_client.fput_object(
    bucket_name='ufc-raw-data',
    object_name='ufc_stats.json',
    file_path=PARSED_DATA_PATH,
)

os.remove(PARSED_DATA_PATH)