In [1]:
from datetime import datetime
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [10]:
urls_yearly_events = []
currentyear = datetime.now().year

for year in range(1994, currentyear + 1):
    newurl = 'https://mmadecisions.com/decisions-by-event/' + str(year) + '/'
    urls_yearly_events.append(newurl)

In [11]:
all_event_data = []

for url in urls_yearly_events:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    decision_rows = soup.find_all('tr', class_='decision')

    for row in decision_rows:
        row_data = [cell.text.strip() for cell in row.find_all('td')]

        # Find the <a> tag (if it exists) within the row
        a_tag = row.find('a')

        # Extract the href if found, otherwise set to None
        href = a_tag['href'] if a_tag else None

        # Append the href to the row data
        row_data.append('https://mmadecisions.com/' + href)

        all_event_data.append(row_data)

# Create DataFrame with an extra column for the href
df_event = pd.DataFrame(all_event_data, columns=['Date', 'Event', 'NumFights', 'url'])

# Change date column to dat format
df_event['Date'] = pd.to_datetime(df_event['Date']).dt.date

# Sort descending by date
df_event = df_event.sort_values(by='Date', ascending=False)

df_event.head(10)

Unnamed: 0,Date,Event,NumFights,url
1490,2025-03-15,UFC on ESPN+ 112: Vettori vs. Dolidze 2,4,https://mmadecisions.com/event/1530/UFC-on-ESP...
1489,2025-03-15,CW 185: Cage Warriors 185,5,https://mmadecisions.com/event/1532/CW-185-Cag...
1491,2025-03-14,CW 184: Unplugged,3,https://mmadecisions.com/event/1531/CW-184-Unp...
1493,2025-03-08,UFC 313: Pereira vs. Ankalaev,6,https://mmadecisions.com/event/1529/UFC-313-Pe...
1492,2025-03-08,CW 183: Cage Warriors 183,8,https://mmadecisions.com/event/1528/CW-183-Cag...
1494,2025-03-01,UFC on ESPN+ 111: Kape vs. Almabayev,6,https://mmadecisions.com/event/1527/UFC-on-ESP...
1495,2025-02-22,UFC on ESPN+ 110: Cejudo vs. Song,4,https://mmadecisions.com/event/1526/UFC-on-ESP...
1496,2025-02-15,UFC on ESPN+ 109: Cannonier vs. Rodrigues,6,https://mmadecisions.com/event/1525/UFC-on-ESP...
1497,2025-02-09,UFC 312: du Plessis vs. Strickland 2,9,https://mmadecisions.com/event/1524/UFC-312-du...
1498,2025-02-01,UFC on ESPN+ 108: Adesanya vs. Imavov,7,https://mmadecisions.com/event/1523/UFC-on-ESP...


In [None]:
# Save to MMA_Events csv
df_event.to_csv('MMA_Events.csv', index=False)


            Date                                    Event  NumFights  \
0     2025-03-15  UFC on ESPN+ 112: Vettori vs. Dolidze 2          4   
1     2025-03-15                CW 185: Cage Warriors 185          5   
2     2025-03-14                        CW 184: Unplugged          3   
3     2025-03-08            UFC 313: Pereira vs. Ankalaev          6   
4     2025-03-08                CW 183: Cage Warriors 183          8   
...          ...                                      ...        ...   
1498  1996-05-17                UFC 9: Motor City Madness          1   
1499  1996-02-16                 UFC 8: David vs. Goliath          1   
1500  1995-12-16              UU 95: Ultimate Ultimate 95          3   
1501  1995-09-08              UFC 7: The Brawl in Buffalo          1   
1502  1995-04-07           UFC 5: The Return of the Beast          1   

                                                    url  
0     https://mmadecisions.com/event/1530/UFC-on-ESP...  
1     https://mmade

In [None]:
all_fight_data = []

for url in df_event['url']:
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')

  # Get event info
  event_info_soup = soup.find_all('td', class_ = 'decision-top2')
  current_event = []

  for text in event_info_soup:
      current_event.append(text.get_text(strip=True,separator='||| '))  # Add text from soup into a list as a single object with ||| as a delimiter


  # Split items into different objects in the list
  current_event = current_event[0].split('||| ')

  for row in soup.find_all('td', class_='list2'):

    # Find the <a> tag (if it exists) within the row
    a_tag = row.find('a')

    # Extract the href if found, otherwise set to None
    href = ['https://mmadecisions.com/' + a_tag['href'] if a_tag else None]

    href.extend(current_event)

    all_fight_data.append(href)


df_fights = pd.DataFrame(all_fight_data)
df_fights.head(10)


