In [4]:
from datetime import datetime
import requests
import pandas as pd
from bs4 import BeautifulSoup
import UFC_Scrape_Library as LIB


In [5]:
urls_yearly_events = []
currentyear = datetime.now().year

for year in range(1994, currentyear + 1):
    newurl = 'https://mmadecisions.com/decisions-by-event/' + str(year) + '/'
    urls_yearly_events.append(newurl)

In [6]:
all_event_data = []

for url in urls_yearly_events:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    decision_rows = soup.find_all('tr', class_='decision')

    for row in decision_rows:
        row_data = [cell.text.strip() for cell in row.find_all('td')]

        # Find the <a> tag (if it exists) within the row
        a_tag = row.find('a')

        # Extract the href if found, otherwise set to None
        href = a_tag['href'] if a_tag else None

        # Append the href to the row data
        row_data.append('https://mmadecisions.com/' + href)

        all_event_data.append(row_data)

# Create DataFrame with an extra column for the href
df_event = pd.DataFrame(all_event_data, columns=['Date', 'Event', 'NumFights', 'url'])

# Change date column to dat format
df_event['Date'] = pd.to_datetime(df_event['Date']).dt.date

# Sort descending by date
df_event = df_event.sort_values(by='Date', ascending=False)

df_event.head(10)

Unnamed: 0,Date,Event,NumFights,url
1489,2025-04-19,CW 187: Glasgow,3,https://mmadecisions.com/event/1541/CW-187-Gla...
1490,2025-04-18,PFL 3: 2025 Season,2,https://mmadecisions.com/event/1540/PFL-3-2025...
1491,2025-04-12,UFC 314: Volkanovski vs. Lopes,5,https://mmadecisions.com/event/1538/UFC-314-Vo...
1492,2025-04-11,PFL 2: 2025 Season,3,https://mmadecisions.com/event/1539/PFL-2-2025...
1493,2025-04-05,UFC on ESPN 65: Emmett vs. Murphy,8,https://mmadecisions.com/event/1536/UFC-on-ESP...
1494,2025-04-03,PFL 1: 2025 Season,2,https://mmadecisions.com/event/1537/PFL-1-2025...
1495,2025-03-29,UFC on ESPN 64: Moreno vs. Erceg,7,https://mmadecisions.com/event/1535/UFC-on-ESP...
1496,2025-03-22,UFC on ESPN+ 113: Edwards vs. Brady,9,https://mmadecisions.com/event/1534/UFC-on-ESP...
1497,2025-03-21,CW 186: Cage Warriors 186,7,https://mmadecisions.com/event/1533/CW-186-Cag...
1498,2025-03-15,CW 185: Cage Warriors 185,5,https://mmadecisions.com/event/1532/CW-185-Cag...


In [7]:
# Save to MMA_Events csv
df_event.to_csv('MMA_Events.csv', index=False)


In [8]:
all_fight_data = []

for url in df_event['url']:
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')

  # Get event info
  event_info_soup = soup.find_all('td', class_ = 'decision-top2')
  current_event = []

  for text in event_info_soup:
      current_event.append(text.get_text(strip=True,separator='||| '))  # Add text from soup into a list as a single object with ||| as a delimiter


  # Split items into different objects in the list
  current_event = current_event[0].split('||| ')

  for row in soup.find_all('td', class_='list2'):

    # Find the <a> tag (if it exists) within the row
    a_tag = row.find('a')

    # Extract the href if found, otherwise set to None
    href = ['https://mmadecisions.com/' + a_tag['href'] if a_tag else None]

    href.extend(current_event)

    all_fight_data.append(href)

# Create dataframe
df_fights = pd.DataFrame(all_fight_data,columns=['url', 'Event', 'Venue', 'Location'])

# Reorder columns
df_fights = df_fights[['Event', 'Location', 'Venue', 'url']]
df_fights.head(10)


Unnamed: 0,Event,Location,Venue,url
0,CW 187: Glasgow,"Glasgow, Scotland",Braehead Arena,https://mmadecisions.com/decision/15372/Alexan...
1,CW 187: Glasgow,"Glasgow, Scotland",Braehead Arena,https://mmadecisions.com/decision/15371/Paull-...
2,CW 187: Glasgow,"Glasgow, Scotland",Braehead Arena,https://mmadecisions.com/decision/15370/Redgie...
3,PFL 3: 2025 Season,"Orlando, Florida, USA",Universal Studios Florida,https://mmadecisions.com/decision/15369/Joshua...
4,PFL 3: 2025 Season,"Orlando, Florida, USA",Universal Studios Florida,https://mmadecisions.com/decision/15368/Aaron-...
5,UFC 314: Volkanovski vs. Lopes,"Miami, Florida, USA",Kaseya Center,https://mmadecisions.com/decision/15364/Alexan...
6,UFC 314: Volkanovski vs. Lopes,"Miami, Florida, USA",Kaseya Center,https://mmadecisions.com/decision/15362/Yair-R...
7,UFC 314: Volkanovski vs. Lopes,"Miami, Florida, USA",Kaseya Center,https://mmadecisions.com/decision/15358/Virna-...
8,UFC 314: Volkanovski vs. Lopes,"Miami, Florida, USA",Kaseya Center,https://mmadecisions.com/decision/15357/Chase-...
9,UFC 314: Volkanovski vs. Lopes,"Miami, Florida, USA",Kaseya Center,https://mmadecisions.com/decision/15354/Su-Mud...


In [9]:
# Sort descending by date
df_event = df_event.sort_values(by='url', ascending=False)

# Save to MMA_Events csv
df_fights.to_csv('MMA_Decisions.csv', index=False)

In [11]:
list_scorecard = []

for index, row in df_fights.iterrows():
    url_scorecard = row['url']
    fight_df = LIB.scorecard_scrape(url)
    list_scorecard.append(fight_df)

# Concatenate all the DataFrames into one
df_scorecards = pd.concat(list_scorecard, ignore_index=True)

KeyError: 1