### Scraping Fighters Data

In [2]:
import requests,bs4
import helpers

In [3]:
BASE_URL = "http://ufcstats.com/statistics/fighters"

In [4]:
import string

# The page is organized by fighters names so I decided to
# Loop through different pages using an alphabets list
alphabets = list(string.ascii_lowercase)
for letter in alphabets:
    fighters_DOM_per_letter = helpers.cached_request(f"{BASE_URL}?char={letter}")
    soup = bs4.BeautifulSoup(fighters_DOM_per_letter, 'html.parser')
    pagination = soup.find('ul', class_='b-statistics__paginate')
    n_pages = len(pagination.find_all('li'))

    # Initialize the fighters dict
    if letter == 'a':
        headers = [th.get_text(strip=True) for th in soup.select("table thead th")]
        fighters_data = {header.title(): [] for header in headers}
    
    # Not adding 1 to the number of list items because
    # there is a link for "all"
    for i in range(1, n_pages):
        current_page = helpers.cached_request(f"{BASE_URL}?char={letter}&page={i}")
        soup_1 = bs4.BeautifulSoup(current_page, 'html.parser')

        # Scraping fighters tabular data 
        for row in soup_1.select("table tbody tr"):
            cells = row.find_all('td',class_='b-statistics__table-col')
            
            if len(cells) == 0 :
                continue
            
            while len(cells) < len(headers):
                cells.append(None)
            
            for header,cell in zip(headers,cells):
                header = header.title()
                if cell:
                    fighters_data[header].append(cell.get_text(strip=True))
                else :
                    fighters_data[header].append(None)

In [31]:
import pandas as pd

fighters_df = pd.DataFrame(fighters_data)

In [32]:
fighters_df.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt
0,Tom,Aaron,,--,155 lbs.,--,,5,3,0,
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4,6,0,
2,Nariman,Abbasov,Bayraktar,"5' 8""",155 lbs.,"66.0""",Orthodox,28,4,0,
3,Darion,Abbey,,"6' 2""",265 lbs.,"80.0""",Orthodox,9,5,0,
4,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10,15,0,


In [33]:
fighters_df.to_csv('fighters.csv',index=False)

### Scraping Events

In [7]:
BASE_URL = "http://ufcstats.com/statistics/events/completed?page=all"

In [8]:
events_html = helpers.cached_request("http://ufcstats.com/statistics/events/completed?page=all")

In [9]:
events_soup = bs4.BeautifulSoup(events_html,'html.parser')

# Get table headers
events_table_headers = [th.get_text(strip=True) for th in events_soup.select('table thead th')]
# The first header is Name/Date so it is better to split it into separate name & date
events_table_headers = events_table_headers[0].split('/')+events_table_headers[1:]
events_dict = {header.title() :[] for header in events_table_headers}
events_rows = events_soup.select('table tbody tr.b-statistics__table-row')
# The first element is an empty row  
events_rows.pop(0)
for row in events_rows:
    
    first_td = row.select_one('td.b-statistics__table-col')
    event_name = first_td.select_one('a.b-link').get_text(strip=True)
    event_date = first_td.select_one('span.b-statistics__date').get_text(strip=True)

    location = row.select_one('td.b-statistics__table-col_style_big-top-padding').get_text(strip=True)

    events_info = [event_name,event_date,location]
    
    for header,cell in zip(events_table_headers,events_info):
        header = header.title()
        events_dict[header].append(cell)
        

In [10]:
# Storing events in a dataframe 
events_df = pd.DataFrame(events_dict)
events_df.head()

Unnamed: 0,Name,Date,Location
0,UFC Fight Night: Walker vs. Zhang,"August 23, 2025","Shanghai, Hebei, China"
1,UFC 319: Du Plessis vs. Chimaev,"August 16, 2025","Chicago, Illinois, USA"
2,UFC Fight Night: Dolidze vs. Hernandez,"August 09, 2025","Las Vegas, Nevada, USA"
3,UFC Fight Night: Taira vs. Park,"August 02, 2025","Las Vegas, Nevada, USA"
4,UFC Fight Night: Whittaker vs. De Ridder,"July 26, 2025","Abu Dhabi, Abu Dhabi, United Arab Emirates"


In [34]:
events_df.to_csv('events.csv',index=False)

### Scraping Events Details (Fights)

In [None]:
# I already have events_rows
first_row = True
# Loop through them
for row in events_rows:
    first_td = row.select_one('td.b-statistics__table-col')
    event_link = first_td.select_one('a.b-link').get('href')
    # Visit each event link
    event_html = helpers.cached_request(event_link)
    event_soup = bs4.BeautifulSoup(event_html, "html.parser")
    if first_row:
        fights_headers = ["Win/No Contest/Draw",
                          "Fighter_1",
                          "Fighter_2",
                          "KD_1",
                          "KD_2",
                          "STR_1",
                          "STR_2",
                          "TD_1",
                          "TD_2",
                          "SUB_1",
                          "SUB_2",
                          "Weight_Class",
                          "Method",
                          "Round",
                          "Fight_Time"
                          ]
        fights_dict = {header: [] for header in fights_headers}
        first_row = False
        
    # For each event, fights are arranged in tables
    fight_rows = event_soup.select(
        "table.b-fight-details__table tbody tr.b-fight-details__table-row")
    for fight_row in fight_rows:
        cells = fight_row.select('td')

        # Get fights data
        result_flag = cells[0].get_text(strip=True)

        fighters = [a.get_text(strip=True)
                    for a in cells[1].select("a.b-link")]
        kd = [p.get_text(strip=True) for p in cells[2].select("p")]
        strikes = [p.get_text(strip=True) for p in cells[3].select("p")]
        td = [p.get_text(strip=True) for p in cells[4].select("p")]
        sub = [p.get_text(strip=True) for p in cells[5].select("p")]
        weight_class = cells[6].get_text(strip=True)
        method = " ".join([p.get_text(strip=True)
                          for p in cells[7].select("p") if p.get_text(strip=True)])
        round_num = cells[8].get_text(strip=True)
        fight_time = cells[9].get_text(strip=True)

        fight_info = [result_flag, *fighters, *kd, *strikes, *
                      td, *sub, weight_class, method, round_num, fight_time]

        for col, val in zip(fights_dict.values(), fight_info):
            col.append(val)

In [35]:
fights_df = pd.DataFrame(fights_dict)
fights_df.head()

Unnamed: 0,Win/No Contest/Draw,Fighter_1,Fighter_2,KD_1,KD_2,STR_1,STR_2,TD_1,TD_2,SUB_1,SUB_2,Weight_Class,Method,Round,Fight_Time
0,win,Johnny Walker,Zhang Mingyang,1,0,50,20,0,0,0,0,Light Heavyweight,KO/TKO Punches,2,2:37
1,win,Aljamain Sterling,Brian Ortega,0,0,124,55,3,0,0,0,Catch Weight,U-DEC,5,5:00
2,win,Sergei Pavlovich,Waldo Cortes-Acosta,0,0,61,45,0,0,0,0,Heavyweight,U-DEC,3,5:00
3,win,Sumudaerji,Kevin Borjas,0,0,73,16,0,0,0,0,Flyweight,U-DEC,3,5:00
4,win,Taiyilake Nueraji,Kiefer Crosbie,0,0,27,2,1,0,0,0,Welterweight,KO/TKO Elbows,1,3:33


In [36]:
fights_df.to_csv('fights.csv',index=False)

There are more details about fights at the "/fight-details" route

### Scrape Fights Details