In [2]:
from datetime import datetime
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [3]:
urls_yearly_events = []
currentyear = datetime.now().year

for year in range(1994, currentyear + 1):
    newurl = 'https://mmadecisions.com/decisions-by-event/' + str(year) + '/'
    urls_yearly_events.append(newurl)

In [4]:
all_event_data = []

for url in urls_yearly_events:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    decision_rows = soup.find_all('tr', class_='decision')

    for row in decision_rows:
        row_data = [cell.text.strip() for cell in row.find_all('td')]

        # Find the <a> tag (if it exists) within the row
        a_tag = row.find('a')

        # Extract the href if found, otherwise set to None
        href = a_tag['href'] if a_tag else None

        # Append the href to the row data
        row_data.append('https://mmadecisions.com/' + href)

        all_event_data.append(row_data)

# Create DataFrame with an extra column for the href
df_event = pd.DataFrame(all_event_data, columns=['Date', 'Event', 'NumFights', 'url'])

# Change date column to dat format
df_event['Date'] = pd.to_datetime(df_event['Date']).dt.date

# Sort descending by date
df_event = df_event.sort_values(by='Date', ascending=False)

df_event.head(10)

Unnamed: 0,Date,Event,NumFights,url
1489,2025-05-10,UFC 315: Muhammad vs. Della Maddalena,7,https://mmadecisions.com/event/1546/UFC-315-Mu...
1490,2025-05-03,UFC on ESPN 67: Sandhagen vs. Figueiredo,6,https://mmadecisions.com/event/1545/UFC-on-ESP...
1491,2025-05-02,PFL 4: 2025 Season,4,https://mmadecisions.com/event/1544/PFL-4-2025...
1492,2025-04-26,CW 188: Cage Warriors 188,8,https://mmadecisions.com/event/1542/CW-188-Cag...
1493,2025-04-26,UFC on ESPN 66: Machado Garry vs. Prates,7,https://mmadecisions.com/event/1543/UFC-on-ESP...
1494,2025-04-19,CW 187: Glasgow,3,https://mmadecisions.com/event/1541/CW-187-Gla...
1495,2025-04-18,PFL 3: 2025 Season,2,https://mmadecisions.com/event/1540/PFL-3-2025...
1496,2025-04-12,UFC 314: Volkanovski vs. Lopes,5,https://mmadecisions.com/event/1538/UFC-314-Vo...
1497,2025-04-11,PFL 2: 2025 Season,3,https://mmadecisions.com/event/1539/PFL-2-2025...
1498,2025-04-05,UFC on ESPN 65: Emmett vs. Murphy,8,https://mmadecisions.com/event/1536/UFC-on-ESP...


In [5]:
# Save to MMA_Events csv
df_event.to_csv('MMA_Events.csv', index=False)

In [6]:
all_fight_data = []

for url in df_event['url']:
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')

  # Get event info
  event_info_soup = soup.find_all('td', class_ = 'decision-top2')
  current_event = []

  for text in event_info_soup:
      current_event.append(text.get_text(strip=True,separator='||| '))  # Add text from soup into a list as a single object with ||| as a delimiter


  # Split items into different objects in the list
  current_event = current_event[0].split('||| ')

  for row in soup.find_all('td', class_='list2'):

    # Find the <a> tag (if it exists) within the row
    a_tag = row.find('a')

    # Extract the href if found, otherwise set to None
    href = ['https://mmadecisions.com/' + a_tag['href'] if a_tag else None]

    href.extend(current_event)

    all_fight_data.append(href)

# Create dataframe
df_fights = pd.DataFrame(all_fight_data,columns=['url', 'Event', 'Venue', 'Location'])
df_fights['url'] = df_fights['url'].str.strip()

# Reorder columns
df_fights = df_fights[['Event', 'Location', 'Venue', 'url']]
df_fights.head(10)


Unnamed: 0,Event,Location,Venue,url
0,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15422/Jack-D...
1,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15421/Valent...
2,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15420/Aieman...
3,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15419/Natali...
4,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15415/Modest...
5,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15414/Navajo...
6,UFC 315: Muhammad vs. Della Maddalena,"Montreal, Quebec, Canada",Bell Centre,https://mmadecisions.com/decision/15412/Daniel...
7,UFC on ESPN 67: Sandhagen vs. Figueiredo,"Des Moines, Iowa, USA",Wells Fargo Arena,https://mmadecisions.com/decision/15407/Montel...
8,UFC on ESPN 67: Sandhagen vs. Figueiredo,"Des Moines, Iowa, USA",Wells Fargo Arena,https://mmadecisions.com/decision/15406/Serhiy...
9,UFC on ESPN 67: Sandhagen vs. Figueiredo,"Des Moines, Iowa, USA",Wells Fargo Arena,https://mmadecisions.com/decision/15405/Mason-...


In [7]:
# Sort descending by date
df_event = df_event.sort_values(by='url', ascending=False)

# Save to MMA_Events csv
df_fights.to_csv('MMA_Decisions.csv', index=False)

In [9]:
list_scorecard = []
error_urls = []

for url in df_fights['url']:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    tables = soup.find_all('table')
    try:
        second_table = tables[2]
    except IndexError:
        try:
            second_table = tables[1]
        except IndexError:
            error_urls.append(url)
            continue

    # Find all tr elements with class "top-row" or "decision"
    rows = second_table.find_all('tr', class_=['top-row', 'decision'])

    # Filter out rows with class "decision-bottom2" and "Tale of the Tape" text
    filtered_rows = [
        row for row in rows
        if 'decision-bottom2' not in row.get('class', []) and "TALE OF THE TAPE" not in row.text
    ]

    # Initialize an empty list to store data
    data = []

    # Extract data and append to the list
    for row in filtered_rows:
        row_text = row.get_text(strip=False)  # Get the text content of the row
        row_data = row_text.splitlines()  # Split the text into lines
        data.append(row_data)

    # Create the DataFrame
    df_scorecards = pd.DataFrame(data)
    df_scorecards = df_scorecards[[1, 2, 3]]
    df_scorecards = df_scorecards[~df_scorecards[1].str.contains("LEGEND")]

    # Reset the index (optional)
    df_scorecards = df_scorecards.reset_index(drop=True)
    
    fight = url.rsplit('/', 1)[-1].replace('-', ' ').strip()
    fighter_one = fight.rsplit('vs', 1)[0].strip()
    try:
        fighter_two = fight.rsplit('vs', 1)[1].strip()
    except IndexError:
        error_urls.append(url)
        continue

    # Find the index of the row with blank values in columns 1 and 3
    try:
        blank_row_index = df_scorecards[(df_scorecards[1] == '') & (df_scorecards[3] == '')].index[0]
    except IndexError:
        error_urls.append(url)
        continue

    # Remove all rows above the blank row
    df_scorecards = df_scorecards[blank_row_index:]
    
    # Reset the index
    df_scorecards = df_scorecards.reset_index(drop=True)

    df_scorecards = df_scorecards[~df_scorecards[1].str.contains("ROUND")]

    df_scorecards['Judge'] = ''

    current_judge = None

    for index, row in df_scorecards.iterrows():
        # Extract the value from column 2
        value = row[2]

        # Check if the value contains a judge's name
        if any(char.isalpha() for char in value):  # Check if any character is alphabetic
            current_judge = value  # Update current_judge if it's a judge's name

        # Assign the current_judge to the 'Judge' column
        df_scorecards.loc[index, 'Judge'] = current_judge
    
    df_scorecards = df_scorecards[df_scorecards[1] != '']
    df_scorecards = df_scorecards.rename(columns={1: 'Round', 2: 'ScoreOne', 3: 'ScoreTwo'})

    df_scorecards['FighterOne'] = fighter_one
    df_scorecards['FighterTwo'] = fighter_two
    df_scorecards['Fight'] = fight
    df_scorecards['url'] = url

    df_scorecards = df_scorecards[['Fight', 'Judge', 'Round', 'FighterOne', 'FighterTwo', 'ScoreOne', 'ScoreTwo', 'url']]
    
    list_scorecard.append(df_scorecards)


df_scorecards = pd.concat(list_scorecard, ignore_index=True)

print("Errors:",len(error_urls))

df_scorecards.head(10)

Errors: 373


Unnamed: 0,Fight,Judge,Round,FighterOne,FighterTwo,ScoreOne,ScoreTwo,url
0,Jack Della Maddalena vs Belal Muhammad,Michael Bell,1,Jack Della Maddalena,Belal Muhammad,9,10,https://mmadecisions.com/decision/15422/Jack-D...
1,Jack Della Maddalena vs Belal Muhammad,Michael Bell,2,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...
2,Jack Della Maddalena vs Belal Muhammad,Michael Bell,3,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...
3,Jack Della Maddalena vs Belal Muhammad,Michael Bell,4,Jack Della Maddalena,Belal Muhammad,9,10,https://mmadecisions.com/decision/15422/Jack-D...
4,Jack Della Maddalena vs Belal Muhammad,Michael Bell,5,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...
5,Jack Della Maddalena vs Belal Muhammad,Eric Colón,1,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...
6,Jack Della Maddalena vs Belal Muhammad,Eric Colón,2,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...
7,Jack Della Maddalena vs Belal Muhammad,Eric Colón,3,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...
8,Jack Della Maddalena vs Belal Muhammad,Eric Colón,4,Jack Della Maddalena,Belal Muhammad,9,10,https://mmadecisions.com/decision/15422/Jack-D...
9,Jack Della Maddalena vs Belal Muhammad,Eric Colón,5,Jack Della Maddalena,Belal Muhammad,10,9,https://mmadecisions.com/decision/15422/Jack-D...


In [10]:
# Sort descending by date
df_scorecards = df_scorecards.sort_values(by='url', ascending=False)

# Save to MMA_Events csv
df_scorecards.to_csv('MMA_Scorecards.csv', index=False)