In [1]:
# pip install requests beautifulsoup4 pandas

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [3]:

# Function to get HTML response
def get_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None
        
# Function to parse event URLs from a single events page
def parse_event_urls(html):
    soup = BeautifulSoup(html, 'html.parser')
    event_links = []

    for link in soup.find_all('a', class_='b-link b-link_style_black'):
        href = link.get('href')
        if 'event-details' in href:
            event_links.append(href)
    
    return event_links

# Function to parse fight night details from an event page
def parse_fight_night_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    details = {}

    details['Event Name'] = soup.find('h2', class_='b-content__title').text.strip()
    details['Date'] = soup.find('li', class_='b-list__box-list-item').text.strip().split(':')[-1].strip()
    details['Location'] = soup.find_all('li', class_='b-list__box-list-item')[1].text.strip().split(':')[-1].strip()
    details['URL'] = soup.find('a', class_='b-link b-link_style_black', href=True).get('href', '').strip('"')

    return details

# Function to get all event URLs from all pages (Pagination)
def get_all_event_urls(base_url):
    event_urls = []
    page_number = 1

    while True:
        url = f"{base_url}&page={page_number}"
        html = get_html(url)
        if not html:
            break

        urls = parse_event_urls(html)
        if not urls:
            break

        event_urls.extend(urls)
        page_number += 1

    return event_urls

In [4]:
# URL of the UFC stats completed events page
base_url = 'http://www.ufcstats.com/statistics/events/completed?'

# Get all event URLs
event_urls = get_all_event_urls(base_url)

if event_urls:
    fight_nights = []
    for event_url in event_urls:
        event_html = get_html(event_url)
        if event_html:
            details = parse_fight_night_details(event_html)
            fight_nights.append(details)

    # Create a DataFrame and save to a CSV file
    df = pd.DataFrame(fight_nights)
    df.to_csv('ufc_fight_nights.csv', index=False)
    print("Data has been saved to ufc_fight_nights.csv")
else:
    print("Failed to retrieve any event URLs")

Data has been saved to ufc_fight_nights.csv


In [5]:
df

Unnamed: 0,Event Name,Date,Location,URL
0,UFC Fight Night: Barboza vs. Murphy,"May 18, 2024","Las Vegas, Nevada, USA",http://www.ufcstats.com/fighter-details/396fe8...
1,UFC Fight Night: Lewis vs. Nascimento,"May 11, 2024","St. Louis, Missouri, USA",http://www.ufcstats.com/fighter-details/d3df1a...
2,UFC 301: Pantoja vs. Erceg,"May 04, 2024","Rio de Janeiro, Rio de Janeiro, Brazil",http://www.ufcstats.com/fighter-details/a0f000...
3,UFC Fight Night: Nicolau vs. Perez,"April 27, 2024","Las Vegas, Nevada, USA",http://www.ufcstats.com/fighter-details/ab2b4f...
4,UFC 300: Pereira vs. Hill,"April 13, 2024","Las Vegas, Nevada, USA",http://www.ufcstats.com/fighter-details/e5549c...
...,...,...,...,...
686,UFC 5: The Return of the Beast,"April 07, 1995","Charlotte, North Carolina, USA",http://www.ufcstats.com/fighter-details/c670aa...
687,UFC 4: Revenge of the Warriors,"December 16, 1994","Tulsa, Oklahoma, USA",http://www.ufcstats.com/fighter-details/429e7d...
688,UFC 3: The American Dream,"September 09, 1994","Charlotte, North Carolina, USA",http://www.ufcstats.com/fighter-details/ad047e...
689,UFC 2: No Way Out,"March 11, 1994","Denver, Colorado, USA",http://www.ufcstats.com/fighter-details/429e7d...
