In [17]:
#Import libraries for web-scraping and saving to CSV file.
import requests
import bs4
import os
import pandas as pd
from datetime import datetime
import time
import logging

### get event urls

In [18]:


main_url = requests.get('http://ufcstats.com/statistics/events/completed?page=all')
main_event_soup = bs4.BeautifulSoup(main_url.text, 'lxml')


#Adds href to list if href contains a link with keyword 'event-details'
all_event_urls = [item.get('href') for item in  main_event_soup.find_all('a') 
                    if type(item.get('href')) == str 
                    and 'event-details' in item.get('href')]


### get events from event urls

In [19]:
event = all_event_urls[0]
event_request = requests.get(event)
event_soup = bs4.BeautifulSoup(event_request.text,'lxml')

In [20]:
event_full_location = event_soup.select('li')[4].text.split(':')[1].strip().split(',')

In [21]:
location_tag = [i.text for i in event_soup.select('li.b-list__box-list-item') if 'location' in str.lower(i.text)]
date_tag = [i.text for i in event_soup.select('li.b-list__box-list-item') if 'date' in str.lower(i.text)]


In [22]:
event_full_location = location_tag[0].split(':')[1].strip().split(',')

In [23]:
try:
    
    event_name = event_soup.select('h2')[0].text
    event_date = str(datetime.strptime(date_tag[0].split(':')[-1].strip(), '%B %d, %Y'))
    event_city = event_full_location[0]
    event_country = event_full_location[-1]

    if len(event_full_location)>2:
        event_state = event_full_location[1]
    else:
        event_state = 'NULL'    

except (IndexError, AttributeError, ValueError) as e:
    logging.warning(f"Known error scraping event page: {event} — {type(e).__name__}: {e}")
except Exception as e:
    logging.error(f"Unexpected error scraping event page: {event} — {type(e).__name__}: {e}")

In [24]:
{'event_url':event,
 'event_name':event_name.strip(),
 'event_date': event_date,
  'event_state': event_state,
  'event_city': event_city,
   'event_country': event_country}

{'event_url': 'http://ufcstats.com/event-details/de20ffb3fc2e7629',
 'event_name': 'UFC Fight Night: Sandhagen vs. Figueiredo',
 'event_date': '2025-05-03 00:00:00',
 'event_state': ' Iowa',
 'event_city': 'Des Moines',
 'event_country': ' USA'}

### get upcomingevents 

In [25]:
request_url = 'http://ufcstats.com/statistics/events/upcoming?page=all'
response = requests.get(request_url)
upcoming_event_soup = bs4.BeautifulSoup(response.text,'lxml')

upcoming_event_urls = [item.get('href') for item in upcoming_event_soup.find_all('a') if type(item.get('href')) == type('a') and 'event-details' in item.get('href')]

names_and_dates = [i.text.strip() for i in upcoming_event_soup.select('i.b-statistics__table-content')]
locations = [i.text.strip() for i in upcoming_event_soup.select('td.b-statistics__table-col_style_big-top-padding')]

event_names_df = pd.DataFrame(columns = ['event_url','event_name','event_date','event_location'])
for i,j,k in zip(names_and_dates,locations,upcoming_event_urls):
    temp = i.split('\n')
    name = temp[0]
    date = temp[-1]
    event_names_df.loc[len(event_names_df.index)] = [k, name, date, j]
    

In [26]:
event_names_df.head()

Unnamed: 0,event_url,event_name,event_date,event_location
0,http://ufcstats.com/event-details/de20ffb3fc2e...,UFC Fight Night: Sandhagen vs. Figueiredo,"May 03, 2025","Des Moines, Iowa, USA"
1,http://ufcstats.com/event-details/118463dd8db1...,UFC 315: Muhammad vs. Della Maddalena,"May 10, 2025","Montreal, Quebec, Canada"
2,http://ufcstats.com/event-details/8ad022dd8122...,UFC Fight Night: Burns vs. Morales,"May 17, 2025","Las Vegas, Nevada, USA"
3,http://ufcstats.com/event-details/2a898bf9fb77...,UFC Fight Night: Blanchfield vs. Barber,"May 31, 2025","Las Vegas, Nevada, USA"
4,http://ufcstats.com/event-details/18c49685296c...,UFC 316: Dvalishvili vs. O'Malley 2,"June 07, 2025","Newark, New Jersey, USA"


In [27]:
upcoming_event_url = upcoming_event_urls[0]
reponse_upcoming_event = requests.get(upcoming_event_url)
fight_card_soup = bs4.BeautifulSoup(reponse_upcoming_event.text,'lxml')

df = pd.DataFrame(columns = ['event_url','fighter_1','fighter_2','weight_class'])
set_info = [i.text.strip() for i in fight_card_soup.select("p.b-fight-details__table-text")
  if 'View Matchup' not in i.text and len(i.text.strip()) > 0]
for set_idx in range(len(set_info)//3):
    df.loc[len(df.index)] = [upcoming_event_url,set_info[set_idx*3],set_info[set_idx*3+1],set_info[set_idx*3+2]]

In [29]:
df.set_index('event_url').head()

Unnamed: 0_level_0,fighter_1,fighter_2,weight_class
event_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
http://ufcstats.com/event-details/de20ffb3fc2e7629,Cory Sandhagen,Deiveson Figueiredo,Bantamweight
http://ufcstats.com/event-details/de20ffb3fc2e7629,Reinier de Ridder,Bo Nickal,Middleweight
http://ufcstats.com/event-details/de20ffb3fc2e7629,Santiago Ponzinibbio,Daniel Rodriguez,Welterweight
http://ufcstats.com/event-details/de20ffb3fc2e7629,Montel Jackson,Daniel Marcos,Bantamweight
http://ufcstats.com/event-details/de20ffb3fc2e7629,Cameron Smotherman,Serhiy Sidey,Bantamweight
