In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [2]:
def get_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None

In [3]:
def scrape_fighter_data(url):
    response = requests.get(url)
    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Extract fighter name
    name = soup.find('span', class_='b-content__title-highlight').text.strip()
    
    # Extract fighter record
    record = soup.find('span', class_='b-content__title-record').text.strip()

    # Extract career statistics using specific selectors
    career_stats = {}
    stats_items = soup.select('li.b-list__box-list-item.b-list__box-list-item_type_block')
    for item in stats_items:
        title = item.select_one('i.b-list__box-item-title').get_text(strip=True)
        value = item.get_text(strip=True, separator=' ').replace(title, '').strip()
        career_stats[title.strip()] = value.strip()

    # Combine extracted data
    fighter_data = {
        'name': name,
        'record': record,
        **career_stats  # Unpack the career statistics into the dictionary
    }

    return fighter_data

In [4]:
# Load the CSV file
df = pd.read_csv('all_fighter_url.csv')
urls = df['0'].to_list()
len(urls)

2498

In [5]:
# Loop through URLs and scrape data
fighters_data = []
for url in urls:
    data = scrape_fighter_data(url)
    if data:
        data['url'] = url
        fighters_data.append(data)

In [6]:
# Convert the list of dictionaries to a DataFrame
fighters_df = pd.DataFrame(fighters_data)

# Save to a new CSV file
fighters_df.to_csv('ufc_fighter_info.csv', index=False)