In [1]:
from datetime import datetime
import requests
import pandas as pd
from bs4 import BeautifulSoup
import UFC_Scrape_Library as LIB


In [2]:
urls_yearly_events = []
currentyear = datetime.now().year

for year in range(1994, currentyear + 1):
    newurl = 'https://mmadecisions.com/decisions-by-event/' + str(year) + '/'
    urls_yearly_events.append(newurl)

In [3]:
all_event_data = []

for url in urls_yearly_events:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    decision_rows = soup.find_all('tr', class_='decision')

    for row in decision_rows:
        row_data = [cell.text.strip() for cell in row.find_all('td')]

        # Find the <a> tag (if it exists) within the row
        a_tag = row.find('a')

        # Extract the href if found, otherwise set to None
        href = a_tag['href'] if a_tag else None

        # Append the href to the row data
        row_data.append('https://mmadecisions.com/' + href)

        all_event_data.append(row_data)

# Create DataFrame with an extra column for the href
df_event = pd.DataFrame(all_event_data, columns=['Date', 'Event', 'NumFights', 'url'])

# Change date column to dat format
df_event['Date'] = pd.to_datetime(df_event['Date']).dt.date

# Sort descending by date
df_event = df_event.sort_values(by='Date', ascending=False)

df_event.head(10)

Unnamed: 0,Date,Event,NumFights,url
1489,2025-04-26,CW 188: Cage Warriors 188,8,https://mmadecisions.com/event/1542/CW-188-Cag...
1490,2025-04-26,UFC on ESPN 66: Machado Garry vs. Prates,4,https://mmadecisions.com/event/1543/UFC-on-ESP...
1491,2025-04-19,CW 187: Glasgow,3,https://mmadecisions.com/event/1541/CW-187-Gla...
1492,2025-04-18,PFL 3: 2025 Season,2,https://mmadecisions.com/event/1540/PFL-3-2025...
1493,2025-04-12,UFC 314: Volkanovski vs. Lopes,5,https://mmadecisions.com/event/1538/UFC-314-Vo...
1494,2025-04-11,PFL 2: 2025 Season,3,https://mmadecisions.com/event/1539/PFL-2-2025...
1495,2025-04-05,UFC on ESPN 65: Emmett vs. Murphy,8,https://mmadecisions.com/event/1536/UFC-on-ESP...
1496,2025-04-03,PFL 1: 2025 Season,2,https://mmadecisions.com/event/1537/PFL-1-2025...
1497,2025-03-29,UFC on ESPN 64: Moreno vs. Erceg,7,https://mmadecisions.com/event/1535/UFC-on-ESP...
1498,2025-03-22,UFC on ESPN+ 113: Edwards vs. Brady,9,https://mmadecisions.com/event/1534/UFC-on-ESP...


In [4]:
# Save to MMA_Events csv
df_event.to_csv('MMA_Events.csv', index=False)

In [5]:
all_fight_data = []

for url in df_event['url']:
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')

  # Get event info
  event_info_soup = soup.find_all('td', class_ = 'decision-top2')
  current_event = []

  for text in event_info_soup:
      current_event.append(text.get_text(strip=True,separator='||| '))  # Add text from soup into a list as a single object with ||| as a delimiter


  # Split items into different objects in the list
  current_event = current_event[0].split('||| ')

  for row in soup.find_all('td', class_='list2'):

    # Find the <a> tag (if it exists) within the row
    a_tag = row.find('a')

    # Extract the href if found, otherwise set to None
    href = ['https://mmadecisions.com/' + a_tag['href'] if a_tag else None]

    href.extend(current_event)

    all_fight_data.append(href)

# Create dataframe
df_fights = pd.DataFrame(all_fight_data,columns=['url', 'Event', 'Venue', 'Location'])
df_fights['url'] = df_fights['url'].str.strip()

# Reorder columns
df_fights = df_fights[['Event', 'Location', 'Venue', 'url']]
df_fights.head(10)


Unnamed: 0,Event,Location,Venue,url
0,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15380/Marcus...
1,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15379/Adam-D...
2,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15378/Conor-...
3,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15377/Alexan...
4,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15376/Scott-...
5,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15375/Ciaran...
6,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15374/Matthe...
7,CW 188: Cage Warriors 188,"Dublin, Ireland",The RDS,https://mmadecisions.com/decision/15373/Jamie-...
8,UFC on ESPN 66: Machado Garry vs. Prates,"Kansas City, Missouri, USA",T-Mobile Center,https://mmadecisions.com/decision/15388/Matt-S...
9,UFC on ESPN 66: Machado Garry vs. Prates,"Kansas City, Missouri, USA",T-Mobile Center,https://mmadecisions.com/decision/15387/Evan-E...


In [6]:
# Sort descending by date
df_event = df_event.sort_values(by='url', ascending=False)

# Save to MMA_Events csv
df_fights.to_csv('MMA_Decisions.csv', index=False)

In [None]:
list_scorecard = []

for url in df_fights['url']:
    fight_df = LIB.scorecard_scrape(url)
    list_scorecard.append(fight_df)

# Concatenate all the DataFrames into one
df_scorecards = pd.concat(list_scorecard, ignore_index=True)

In [None]:
# Sort descending by date
df_scorecards = df_scorecards.sort_values(by='url', ascending=False)

# Save to MMA_Events csv
df_scorecards.to_csv('MMA_Scorecards.csv', index=False)