# **1 - Scraping**

## Importing Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
pd.set_option('display.max_columns', None)

## Scraping Fighters
**Fighters**: http://www.ufcstats.com/statistics/fighters?char=a&page=all

**Fighter**: http://www.ufcstats.com/fighter-details/93fe7332d16c6ad9

### Fetching

In [2]:
fighters_soups = []
# Fighters' Pages Sorted by Last Initial
for letter in 'abcdefghijklmnopqrstuvwxyz':
    fighters_url = f'http://www.ufcstats.com/statistics/fighters?char={letter}&page=all'
    response = requests.get(fighters_url)
    response.raise_for_status
    page = response.text
    soup = BeautifulSoup(page, 'html.parser')
    # print(soup.prettify())
    fighters_soups.append(soup)

### Scraping

In [3]:
# Create DataFrame
fighters = pd.DataFrame(columns = ['Name', 'Record', 'Height', 'Weight', 'Reach', 'Stance', 'Birthday', 'SLPM', 'SACC', 'SAPM', 'SD', 'TDAVG', 'TDACC', 'TDD', 'SAVG' ])

# Loop Through Fighters' Pages (Last Initial)
for soup in fighters_soups:
    fighters_table = soup.find('table')
    fighters_hyperlinks = fighters_table.find_all('a')
    # Loop Through Fighters w/ Same Last Initial
    for fighters_hyperlink in fighters_hyperlinks[::3]:
        fighters_url = fighters_hyperlink['href']
        fighters_response = requests.get(fighters_url)
        fighters_response.raise_for_status()
        fighters_page = fighters_response.text
        fighters_soup = BeautifulSoup(fighters_page, 'html.parser')
    
        # Name
        name  = fighters_soup.find('span', class_ = 'b-content__title-highlight').text.strip()
        # Record
        record = fighters_soup.find('span', class_ = 'b-content__title-record').text.strip().replace('Record: ', '')
        # Height
        height = fighters_soup.find('i', string = lambda text: text and 'height' in text.lower()).next_sibling.strip()
        # Weight
        weight = fighters_soup.find('i', string = lambda text: text and 'weight' in text.lower()).next_sibling.strip()
        # Reach
        reach = fighters_soup.find('i', string = lambda text: text and 'reach' in text.lower()).next_sibling.strip()
        # Stance
        stance = fighters_soup.find('i', string = lambda text: text and 'stance' in text.lower()).next_sibling.strip()
        # Birthday
        birthday = fighters_soup.find('i', string = lambda text: text and 'dob' in text.lower()).next_sibling.strip()
        # SLPM
        slpm = fighters_soup.find('i', string = lambda text: text and 'slpm' in text.lower()).next_sibling.strip()
        # SACC
        sacc = fighters_soup.find('i', string = lambda text: text and 'str. acc' in text.lower()).next_sibling.strip()
        # SAPM
        sapm = fighters_soup.find('i', string = lambda text: text and 'sapm' in text.lower()).next_sibling.strip()
        # SD
        sd = fighters_soup.find('i', string = lambda text: text and 'str. def' in text.lower()).next_sibling.strip()
        # TDAVG
        tdavg = fighters_soup.find('i', string = lambda text: text and 'td avg' in text.lower()).next_sibling.strip()
        # TDACC
        tdacc = fighters_soup.find('i', string = lambda text: text and 'td acc' in text.lower()).next_sibling.strip()
        # TDD
        tdd = fighters_soup.find('i', string = lambda text: text and 'td def' in text.lower()).next_sibling.strip()
        # SAVG
        savg = fighters_soup.find('i', string = lambda text: text and 'sub. avg' in text.lower()).next_sibling.strip()

        # Add to DataFrame
        fighters.loc[len(fighters)] = [name, record, height, weight, reach, stance, birthday, slpm, sacc, sapm, sd, tdavg, tdacc, tdd, savg]

display(fighters)

Unnamed: 0,Name,Record,Height,Weight,Reach,Stance,Birthday,SLPM,SACC,SAPM,SD,TDAVG,TDACC,TDD,SAVG
0,Tom Aaron,5-3-0,--,155 lbs.,--,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0
1,Danny Abbadi,4-6-0,"5' 11""",155 lbs.,--,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0
2,Nariman Abbasov,28-4-0,"5' 8""",155 lbs.,"66""",Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0
3,David Abbott,10-15-0,"6' 0""",265 lbs.,--,Switch,--,1.35,30%,3.55,38%,1.07,33%,66%,0.0
4,Hamdy Abdelwahab,5-0-0 (1 NC),"6' 2""",264 lbs.,"72""",Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4237,Dave Zitanick,5-7-0 (1 NC),--,170 lbs.,--,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0
4238,Alex Zuniga,6-3-0,--,145 lbs.,--,,--,0.00,0%,0.00,0%,0.00,0%,0%,0.0
4239,George Zuniga,3-1-0,"5' 9""",185 lbs.,--,,--,7.64,38%,5.45,37%,0.00,0%,100%,0.0
4240,Allan Zuniga,13-1-0,"5' 7""",155 lbs.,"70""",Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0


### Exporting

In [4]:
fighters.to_csv('fighters.csv', index = False)

## Scraping Fights

**Events**: http://www.ufcstats.com/statistics/events/completed?page=all

**Fights**: http://www.ufcstats.com/statistics/events/completed

**Fight**: http://www.ufcstats.com/fight-details/15805ae1eea3343e

### Fetching

In [5]:
fights_url = 'http://www.ufcstats.com/statistics/events/completed?page=all'
response = requests.get(fights_url)
response.raise_for_status
page = response.text
soup = BeautifulSoup(page, 'html.parser')
# print(soup.prettify())

### Scraping

In [7]:
# Create DataFrame
fights = pd.DataFrame(columns = ['A', 'B', 'Winner', 'Event', 'Date', 'Location', 'Division', 'Method', 'Round', 'Time', 'Format', 'Referee',
'A_KD', 'A_SS', 'A_SSP', 'A_TS', 'A_TD', 'A_TDP', 'A_SA', 'A_REV', 'A_CTRL', 'A_H', 'A_B', 'A_L', 'A_D', 'A_C', 'A_G',
'B_KD', 'B_SS', 'B_SSP', 'B_TS', 'B_TD', 'B_TDP', 'B_SA', 'B_REV', 'B_CTRL', 'B_H', 'B_B', 'B_L', 'B_D', 'B_C', 'B_G'
])

events_table = soup.find('table')
events_table_rows = events_table.find_all('tr', class_ = 'b-statistics__table-row')
# Loop Through Events
for events_table_row in events_table_rows[2:102]:
    event_name = events_table_row.find('a').text.strip() # Event
    #! ERROR CHECKING
    print(f'EVENT: {event_name}')
    print('******************************************************')
    event_date = events_table_row.find('span').text.strip() # Date
    event_location = events_table_row.find('td', class_ = 'b-statistics__table-col b-statistics__table-col_style_big-top-padding').text.strip() # Location

    event_link = events_table_row.find('a')['href']
    event_page = requests.get(event_link).text # Click Event Hyperlink (e.g., http://www.ufcstats.com/event-details/be8ad887e4d674b0)
    event_soup = BeautifulSoup(event_page, 'html')

    # Loop Through Fights in Event
    fight_hyperlinks = event_soup.find_all('tr', class_ = 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click')
    for fight_hyperlink in fight_hyperlinks: 
        fight_hyperlink = fight_hyperlink['data-link'] # Get Fight Hyperlink
        fights_page = requests.get(fight_hyperlink).text # Click Fight Hyperink (e.g., http://www.ufcstats.com/fight-details/15805ae1eea3343e)
        fight_soup = BeautifulSoup(fights_page, 'html')

        # Header
        a = fight_soup.find_all('a', class_ = 'b-link b-fight-details__person-link')[0].text.strip() # A
        b = fight_soup.find_all('a', class_ = 'b-link b-fight-details__person-link')[1].text.strip() # B
        #! ERROR CHECKING
        print(f'FIGHT: {a} vs. {b}')
        print('----------------------------------------')
        winner = b # Winner
        division = fight_soup.find('i', class_ = 'b-fight-details__fight-title').text.strip() # Division
        method = fight_soup.find('i', string = lambda text: text and 'method' in text.lower()).find_next('i').text.strip() # Method
        round = fight_soup.find('i', string = lambda text: text and 'round' in text.lower()).next_sibling.strip() # Round
        time = fight_soup.find('i', string = lambda text: text and 'time' in text.lower()).next_sibling.strip() # Time
        format = fight_soup.find('i', string = lambda text: text and 'format' in text.lower()).next_sibling.strip() # Format
        referee = fight_soup.find('i', string = lambda text: text and 'referee' in text.lower()).find_next('span').text.strip() # Referee

        # Table 1
        fight_body_1_soup = fight_soup.find_all('tbody', class_ = 'b-fight-details__table-body')[0]
        table_1_columns = fight_body_1_soup.find_all('p', class_ = 'b-fight-details__table-text')
        del table_1_columns[0:2] # Delete Irrelevant Columns
        a_kd = table_1_columns[0].text.strip()
        b_kd = table_1_columns[1].text.strip()
        a_ss = table_1_columns[2].text.strip()
        b_ss = table_1_columns[3].text.strip()
        a_ssp = table_1_columns[4].text.strip()
        b_ssp = table_1_columns[5].text.strip()
        a_ts = table_1_columns[6].text.strip()
        b_ts = table_1_columns[7].text.strip()
        a_td = table_1_columns[8].text.strip()
        b_td = table_1_columns[9].text.strip()
        a_tdp = table_1_columns[10].text.strip()
        b_tdp = table_1_columns[11].text.strip()
        a_sa = table_1_columns[12].text.strip()
        b_sa = table_1_columns[13].text.strip()
        a_rev = table_1_columns[14].text.strip()
        b_rev = table_1_columns[15].text.strip()
        a_ctrl = table_1_columns[16].text.strip()
        b_ctrl = table_1_columns[17].text.strip()

        # Table 2
        fight_body_2_soup = fight_soup.find_all('tbody', class_ = 'b-fight-details__table-body')[2]
        table_2_columns = fight_body_2_soup.find_all('p', class_ = 'b-fight-details__table-text')
        del table_2_columns[0:6] # Delete Irrelevant Columns
        a_h = table_2_columns[0].text.strip()
        b_h = table_2_columns[1].text.strip()
        a_b = table_2_columns[2].text.strip()
        b_b = table_2_columns[3].text.strip()
        a_l = table_2_columns[4].text.strip()
        b_l = table_2_columns[5].text.strip()
        a_d = table_2_columns[6].text.strip()
        b_d = table_2_columns[7].text.strip()
        a_c = table_2_columns[8].text.strip()
        b_c = table_2_columns[9].text.strip()
        a_g = table_2_columns[10].text.strip()
        b_g = table_2_columns[11].text.strip()

        # Add to DataFrame
        fights.loc[len(fights)] = [a, b, winner, event_name, event_date, event_location, division, method, round, time, format, referee,
        a_kd, a_ss, a_ssp, a_ts, a_td, a_tdp, a_sa, a_rev, a_ctrl, a_h, a_b, a_l, a_d, a_c, a_g,
        b_kd, b_ss, b_ssp, b_ts, b_td, b_tdp, b_sa, b_rev, b_ctrl, b_h, b_b, b_l, b_d, b_c, b_g
        ]
        
display(fights.head())
display(fights.tail())

EVENT: UFC Fight Night: Cannonier vs. Borralho
******************************************************
FIGHT: Jared Cannonier vs. Caio Borralho
----------------------------------------
FIGHT: Angela Hill vs. Tabatha Ricci
----------------------------------------
FIGHT: Robert Valentin vs. Ryan Loder
----------------------------------------
FIGHT: Kaan Ofli vs. Mairon Santos
----------------------------------------
FIGHT: Neil Magny vs. Michael Morales
----------------------------------------
FIGHT: Edmen Shahbazyan vs. Gerald Meerschaert
----------------------------------------
FIGHT: Dennis Buzukja vs. Francis Marshall
----------------------------------------
FIGHT: Zachary Reese vs. Jose Daniel Medina
----------------------------------------
FIGHT: Viacheslav Borshchev vs. James Llontop
----------------------------------------
FIGHT: Jacqueline Cavalcanti vs. Josiane Nunes
----------------------------------------
FIGHT: Wang Cong vs. Victoria Leonardo
---------------------------------

Unnamed: 0,A,B,Winner,Event,Date,Location,Division,Method,Round,Time,Format,Referee,A_KD,A_SS,A_SSP,A_TS,A_TD,A_TDP,A_SA,A_REV,A_CTRL,A_H,A_B,A_L,A_D,A_C,A_G,B_KD,B_SS,B_SSP,B_TS,B_TD,B_TDP,B_SA,B_REV,B_CTRL,B_H,B_B,B_L,B_D,B_C,B_G
0,Jared Cannonier,Caio Borralho,Caio Borralho,UFC Fight Night: Cannonier vs. Borralho,"August 24, 2024","Las Vegas, Nevada, USA",Middleweight Bout,Decision - Unanimous,5,5:00,5 Rnd (5-5-5-5-5),Dan Miragliotta,0,83 of 254,32%,83 of 254,0 of 0,---,0,0,0:00,57 of 206,11 of 24,15 of 24,82 of 252,1 of 2,0 of 0,1,153 of 255,60%,156 of 258,0 of 1,0%,1,0,1:59,106 of 197,26 of 37,21 of 21,129 of 224,2 of 3,22 of 28
1,Angela Hill,Tabatha Ricci,Tabatha Ricci,UFC Fight Night: Cannonier vs. Borralho,"August 24, 2024","Las Vegas, Nevada, USA",Women's Strawweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Keith Peterson,0,112 of 216,51%,140 of 245,0 of 2,0%,0,0,0:10,66 of 152,43 of 61,3 of 3,88 of 182,19 of 26,5 of 8,0,101 of 239,42%,140 of 280,3 of 4,75%,0,0,5:04,82 of 215,8 of 13,11 of 11,88 of 219,10 of 14,3 of 6
2,Robert Valentin,Ryan Loder,Ryan Loder,UFC Fight Night: Cannonier vs. Borralho,"August 24, 2024","Las Vegas, Nevada, USA",Middleweight Bout,KO/TKO,2,1:49,3 Rnd + OT (5-5-5-5),Mark Smith,0,1 of 2,50%,5 of 6,0 of 1,0%,2,1,1:38,1 of 2,0 of 0,0 of 0,1 of 2,0 of 0,0 of 0,0,42 of 52,80%,63 of 73,1 of 3,33%,1,1,4:45,41 of 51,1 of 1,0 of 0,1 of 2,0 of 0,41 of 50
3,Kaan Ofli,Mairon Santos,Mairon Santos,UFC Fight Night: Cannonier vs. Borralho,"August 24, 2024","Las Vegas, Nevada, USA",Featherweight Bout,KO/TKO,2,1:30,3 Rnd + OT (5-5-5-5),Chris Tognoni,0,12 of 28,42%,19 of 35,0 of 0,---,0,0,1:33,10 of 22,1 of 2,1 of 4,11 of 27,1 of 1,0 of 0,1,31 of 67,46%,32 of 68,0 of 0,---,0,0,0:02,15 of 48,9 of 12,7 of 7,27 of 62,1 of 2,3 of 3
4,Neil Magny,Michael Morales,Michael Morales,UFC Fight Night: Cannonier vs. Borralho,"August 24, 2024","Las Vegas, Nevada, USA",Welterweight Bout,KO/TKO,1,4:39,3 Rnd (5-5-5),Dan Miragliotta,0,5 of 11,45%,13 of 19,0 of 2,0%,0,0,2:05,1 of 7,2 of 2,2 of 2,3 of 8,1 of 2,1 of 1,1,34 of 40,85%,39 of 45,0 of 0,---,0,0,1:16,30 of 35,0 of 0,4 of 5,4 of 5,0 of 0,30 of 35


Unnamed: 0,A,B,Winner,Event,Date,Location,Division,Method,Round,Time,Format,Referee,A_KD,A_SS,A_SSP,A_TS,A_TD,A_TDP,A_SA,A_REV,A_CTRL,A_H,A_B,A_L,A_D,A_C,A_G,B_KD,B_SS,B_SSP,B_TS,B_TD,B_TDP,B_SA,B_REV,B_CTRL,B_H,B_B,B_L,B_D,B_C,B_G
1213,Andre Fialho,Cameron VanCamp,Cameron VanCamp,UFC 274: Oliveira vs. Gaethje,"May 07, 2022","Phoenix, Arizona, USA",Welterweight Bout,KO/TKO,1,2:35,3 Rnd (5-5-5),Marc Goddard,1,17 of 24,70%,17 of 24,0 of 0,---,0,0,0:02,12 of 19,2 of 2,3 of 3,16 of 23,0 of 0,1 of 1,0,19 of 31,61%,19 of 31,0 of 0,---,0,0,0:00,9 of 19,4 of 6,6 of 6,19 of 31,0 of 0,0 of 0
1214,Tracy Cortez,Melissa Gatto,Melissa Gatto,UFC 274: Oliveira vs. Gaethje,"May 07, 2022","Phoenix, Arizona, USA",Women's Flyweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Jason Herzog,0,36 of 66,54%,70 of 102,2 of 3,66%,1,1,7:54,25 of 51,6 of 9,5 of 6,14 of 37,2 of 2,20 of 27,0,30 of 73,41%,49 of 95,3 of 6,50%,1,1,3:17,25 of 61,3 of 7,2 of 5,27 of 69,0 of 1,3 of 3
1215,Kleydson Rodrigues,CJ Vergara,CJ Vergara,UFC 274: Oliveira vs. Gaethje,"May 07, 2022","Phoenix, Arizona, USA",Flyweight Bout,Decision - Split,3,5:00,3 Rnd (5-5-5),Keith Peterson,0,81 of 143,56%,93 of 156,2 of 7,28%,1,2,3:14,39 of 91,32 of 40,10 of 12,70 of 131,6 of 7,5 of 5,0,71 of 127,55%,155 of 221,0 of 0,---,0,0,4:20,42 of 89,18 of 25,11 of 13,45 of 92,7 of 10,19 of 25
1216,Ariane Carnelossi,Loopy Godinez,Loopy Godinez,UFC 274: Oliveira vs. Gaethje,"May 07, 2022","Phoenix, Arizona, USA",Women's Strawweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Jason Herzog,0,9 of 20,45%,23 of 35,0 of 1,0%,0,0,0:00,7 of 18,1 of 1,1 of 1,4 of 13,3 of 5,2 of 2,0,46 of 61,75%,130 of 173,8 of 8,100%,2,0,13:26,36 of 49,10 of 12,0 of 0,11 of 19,8 of 12,27 of 30
1217,Journey Newson,Fernie Garcia,Fernie Garcia,UFC 274: Oliveira vs. Gaethje,"May 07, 2022","Phoenix, Arizona, USA",Bantamweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Ryan Brueggeman,0,57 of 143,39%,63 of 150,2 of 3,66%,0,0,0:42,23 of 92,21 of 30,13 of 21,52 of 138,1 of 1,4 of 4,0,37 of 103,35%,38 of 104,0 of 0,---,0,0,0:02,24 of 84,6 of 12,7 of 7,37 of 101,0 of 2,0 of 0


### Exporting

In [None]:
fights.to_csv('fights.csv', index = False)