In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
from os.path import join as p_join
import sys
from typing import List, Tuple, Dict, Set, Any, Optional, Callable
from pathlib import Path
sys.path.insert(0, str(Path(os.getcwd()).parent))
import requests

try:
    from fabulous import color as fb_color
    color_print = lambda x, color='green': print(getattr(fb_color, color)(x)) if 'fb_color' in globals() else print(x)
except Exception as e:
    color_print = lambda x, color='green': print(x)

from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
from src.parse_utils import get_events_list, get_fights_info, get_fighters_info
from src.processing import eventslist2df

In [5]:
matplotlib.rcParams['figure.figsize'] = (8, 8)
sns.set_style('whitegrid')

---

In [6]:
fights_list, status_ok = get_events_list()
print(status_ok)
print(len(fights_list))
fights_list[:3]

True
615


[{'event_url': 'http://www.ufcstats.com/event-details/8f6a18831a120817',
  'event_name': 'UFC Fight Night: Santos vs. Hill',
  'date': 'August 06, 2022',
  'location': 'Las Vegas, Nevada, USA'},
 {'event_url': 'http://www.ufcstats.com/event-details/b0a6124751a56bc4',
  'event_name': 'UFC 277: Pena vs. Nunes 2',
  'date': 'July 30, 2022',
  'location': 'Dallas, Texas, USA'},
 {'event_url': 'http://www.ufcstats.com/event-details/319c15b8aac5bfde',
  'event_name': 'UFC Fight Night: Blaydes vs. Aspinall',
  'date': 'July 23, 2022',
  'location': 'London, England, United Kingdom'}]

In [7]:
fights_df = eventslist2df(fights_list)
fights_df

Unnamed: 0,event_url,event_name,date,location
0,http://www.ufcstats.com/event-details/8f6a1883...,UFC Fight Night: Santos vs. Hill,"August 06, 2022","Las Vegas, Nevada, USA"
1,http://www.ufcstats.com/event-details/b0a61247...,UFC 277: Pena vs. Nunes 2,"July 30, 2022","Dallas, Texas, USA"
2,http://www.ufcstats.com/event-details/319c15b8...,UFC Fight Night: Blaydes vs. Aspinall,"July 23, 2022","London, England, United Kingdom"
3,http://www.ufcstats.com/event-details/8fd76e1b...,UFC Fight Night: Ortega vs. Rodriguez,"July 16, 2022","Elmont, New York, USA"
4,http://www.ufcstats.com/event-details/31da66df...,UFC Fight Night: Dos Anjos vs. Fiziev,"July 09, 2022","Las Vegas, Nevada, USA"
...,...,...,...,...
610,http://www.ufcstats.com/event-details/1c3f5e85...,UFC 6: Clash of the Titans,"July 14, 1995","Casper, Wyoming, USA"
611,http://www.ufcstats.com/event-details/dedc3bb4...,UFC 5: The Return of the Beast,"April 07, 1995","Charlotte, North Carolina, USA"
612,http://www.ufcstats.com/event-details/b60391da...,UFC 4: Revenge of the Warriors,"December 16, 1994","Tulsa, Oklahoma, USA"
613,http://www.ufcstats.com/event-details/1a49e067...,UFC 3: The American Dream,"September 09, 1994","Charlotte, North Carolina, USA"


In [8]:
fights_info_dict = get_fights_info(fights_urls=fights_df['event_url'].values.tolist()[:50])
print(len(fights_info_dict))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:41<00:00,  1.19it/s]

50





In [9]:
fights_info_dict[fights_df['event_url'].values[1]]

[{'w_l': 'win',
  'fighter': ['Amanda Nunes', 'Julianna Pena'],
  'kd': ['3', '0'],
  'str': ['85', '60'],
  'td': ['6', '0'],
  'sub': ['1', '1'],
  'weight_class': "Women's Bantamweight",
  'method': ['U-DEC', ''],
  'round': '5',
  'time': '5:00'},
 {'w_l': 'win',
  'fighter': ['Brandon Moreno', 'Kai Kara-France'],
  'kd': ['1', '0'],
  'str': ['58', '53'],
  'td': ['0', '1'],
  'sub': ['0', '0'],
  'weight_class': 'Flyweight',
  'method': ['KO/TKO', 'Kick'],
  'round': '3',
  'time': '4:34'},
 {'w_l': 'win',
  'fighter': ['Sergei Pavlovich', 'Derrick Lewis'],
  'kd': ['1', '0'],
  'str': ['15', '4'],
  'td': ['0', '0'],
  'sub': ['0', '0'],
  'weight_class': 'Heavyweight',
  'method': ['KO/TKO', 'Punches'],
  'round': '1',
  'time': '0:55'},
 {'w_l': 'win',
  'fighter': ['Alexandre Pantoja', 'Alex Perez'],
  'kd': ['0', '0'],
  'str': ['8', '10'],
  'td': ['1', '0'],
  'sub': ['1', '0'],
  'weight_class': 'Flyweight',
  'method': ['SUB', 'Neck Crank'],
  'round': '1',
  'time': '1:

In [10]:
from src.parse_utils import get_fighters_info

In [11]:
overall_fighters_list, status_ok = get_fighters_info()
print(f"status_ok: {status_ok}")
print(len(overall_fighters_list))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:30<00:00,  1.18s/it]

status_ok: True
476





In [400]:
# are there namesakes among fighters?
res = Counter([f"{dct['First']}_{dct['Last']}" for dct in overall_fighters_list])
res.most_common(1)

[('Tom_Aaron', 1)]

In [403]:
fights_info_dict

{'http://www.ufcstats.com/event-details/b0a6124751a56bc4': [{'w_l': 'win',
   'fighter': ['Amanda Nunes', 'Julianna Pena'],
   'kd': ['3', '0'],
   'str': ['85', '60'],
   'td': ['6', '0'],
   'sub': ['1', '1'],
   'weight_class': "Women's Bantamweight",
   'method': ['U-DEC', ''],
   'round': '5',
   'time': '5:00'},
  {'w_l': 'win',
   'fighter': ['Brandon Moreno', 'Kai Kara-France'],
   'kd': ['1', '0'],
   'str': ['58', '53'],
   'td': ['0', '1'],
   'sub': ['0', '0'],
   'weight_class': 'Flyweight',
   'method': ['KO/TKO', 'Kick'],
   'round': '3',
   'time': '4:34'},
  {'w_l': 'win',
   'fighter': ['Sergei Pavlovich', 'Derrick Lewis'],
   'kd': ['1', '0'],
   'str': ['15', '4'],
   'td': ['0', '0'],
   'sub': ['0', '0'],
   'weight_class': 'Heavyweight',
   'method': ['KO/TKO', 'Punches'],
   'round': '1',
   'time': '0:55'},
  {'w_l': 'win',
   'fighter': ['Alexandre Pantoja', 'Alex Perez'],
   'kd': ['0', '0'],
   'str': ['8', '10'],
   'td': ['1', '0'],
   'sub': ['1', '0'],
 