In [15]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

def get_data_from_link(url):
    
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(response.text, 'html.parser')
    
    sections = soup.find_all('section', class_='b-fight-details__section js-fight-section')
    for section in sections:
        if section.find('table', class_='b-fight-details__table js-fight-table'):
            fight_section = section
            break
    else:
        fight_section = None

    if fight_section:
        print("Fight section with table found.")

    else:
        print("Fight section with the desired table not found.")
        
    table = fight_section.find('table', class_='b-fight-details__table js-fight-table')
    
    headers = []
    if table:
        header_row = table.find('thead').find_all('th')
        headers = [header.get_text(strip=True) for header in header_row]
        headers.insert(0, 'Round')
        # print("Headers:", headers)
    else:
        print("Table not found within the section.")
        
        
    if table:
        all_rows_data = []
        current_round = '' 

  
        for tbody in table.find_all('tbody'):
            rows = tbody.find_all(['tr', 'th']) 
            for row in rows:
                # Check if the row is a round header
                if row.name == 'th':
                    # Extract the round number from the header
                    current_round = row.get_text(strip=True)
                    continue  # Skip further processing for header rows

                cells = row.find_all('td')
                # Initialize lists to hold data for two fighters in the row, including the current round
                fighter_1_data = [current_round]
                fighter_2_data = [current_round]
                for cell in cells:
                    # Extract texts from each <p> tag within the cell
                    stats = [p.get_text(strip=True) for p in cell.find_all('p')]
                    if len(stats) == 2:
                        fighter_1_data.append(stats[0])
                        fighter_2_data.append(stats[1])
                    elif len(stats) == 1:  # Handling the case where there might be only one stat in a cell
                        fighter_1_data.append(stats[0])
                        fighter_2_data.append('')  # Append an empty string if no stat for the second fighter
                    else:  # If no <p> tags or an unexpected number, append placeholders or handle accordingly
                        fighter_1_data.append('N/A')
                        fighter_2_data.append('N/A')

                # Append the processed data for both fighters to all_rows_data
                all_rows_data.append(fighter_1_data)
                all_rows_data.append(fighter_2_data)

        # Print each row's data for verification
        # for row_data in all_rows_data:
        #     print(row_data)
    else:
        print("No table found.")
        
        
    if table:
        for tbody in table.find_all('tbody'):
            rows = tbody.find_all('tr')
            for row in rows:
                # Assuming the first column contains fighter names with links
                fighter_links = row.find_all('td')[0].find_all('a')
#                 for link in fighter_links:
#                     print(f"Fighter Name: {link.text}, URL: {link['href']}")
                    
                    

    df_total_data_by_round = pd.DataFrame(all_rows_data, columns=headers)
    df_total_data_by_round['Round'] = df_total_data_by_round['Round'].str.replace('Round ', '')

    # Convert the column to numeric, if needed
    df_total_data_by_round['Round'] = pd.to_numeric(df_total_data_by_round['Round'])

    
    
    # Find the section that contains the "Significant Strikes" marker
    significant_strikes_section = soup.find('p', class_='b-fight-details__collapse-link_tot', string=lambda text: 'Significant Strikes' in text if text else False).parent

    if significant_strikes_section:
        print("Significant Strikes section found.")
        # Now, find the table within this section
        significant_strikes_table = significant_strikes_section.find_next('table', class_='b-fight-details__table js-fight-table')

        



    if significant_strikes_table:
        # Adjust headers to exclude 'Fighter'
        headers = ['Round']  # Starting with 'Round' only
        header_row = significant_strikes_table.find('thead').find_all('th')
        headers += [header.get_text(strip=True) for header in header_row]  # Append other headers

        all_rows_data = []
        current_round = 1  # Initialize round counter

        tbody_elements = significant_strikes_table.find_all('tbody')
        for tbody in tbody_elements:
            rows = tbody.find_all('tr')
            for row in rows:
                # Initialize data rows without 'Fighter' placeholder
                fighter_1_data = [current_round]  # Assume first entry is round number
                fighter_2_data = [current_round]  # Assume first entry is round number

                cells = row.find_all('td')
                for cell in cells:
                    p_tags = cell.find_all('p')
                    if len(p_tags) == 2:  # Assuming there are always 2 <p> tags for 2 fighters
                        fighter_1_data.append(p_tags[0].get_text(strip=True))
                        fighter_2_data.append(p_tags[1].get_text(strip=True))
                    elif len(p_tags) == 1:  # Handling cases with only one <p> tag
                        fighter_1_data.append(p_tags[0].get_text(strip=True))
                        fighter_2_data.append('')  # Placeholder or logic to handle missing data

                # Append each fighter's data as a separate row
                all_rows_data.append(fighter_1_data)
                all_rows_data.append(fighter_2_data)

            # Assume each tbody represents a new round; increment the round counter
                current_round += 1

        # Create DataFrame
        df_sig_strikes = pd.DataFrame(all_rows_data, columns=headers)
        # print(df_sig_strikes.head())
    # else:
    #     print("Significant Strikes table not found.")
        
        
        
    df_final_total = pd.merge(df_total_data_by_round,df_sig_strikes,left_on=['Round','Fighter','Sig. str.','Sig. str. %'], right_on=['Round','Fighter','Sig. str','Sig. str. %'])
    df_final_total['URL'] = url
    return (df_final_total)

In [16]:
df_sig = get_data_from_link(url = 'http://www.ufcstats.com/fight-details/14e53999507c76a7')

Fight section with table found.
Significant Strikes section found.


In [17]:
df_sig

Unnamed: 0,Round,Fighter,KD,Sig. str.,Sig. str. %,Total str.,Td %,Td %.1,Sub. att,Rev.,Ctrl,Sig. str,Head,Body,Leg,Distance,Clinch,Ground,URL
0,1,Sean Strickland,0,34 of 75,45%,39 of 80,0 of 0,---,0,0,0:00,34 of 75,28 of 67,2 of 4,4 of 4,34 of 75,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
1,1,Dricus Du Plessis,0,18 of 51,35%,19 of 52,2 of 3,66%,0,0,0:28,18 of 51,8 of 38,3 of 3,7 of 10,18 of 51,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
2,2,Sean Strickland,0,22 of 62,35%,26 of 66,0 of 0,---,0,0,0:00,22 of 62,20 of 59,1 of 2,1 of 1,22 of 62,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
3,2,Dricus Du Plessis,0,26 of 60,43%,26 of 60,1 of 1,100%,0,0,0:22,26 of 60,11 of 42,9 of 9,6 of 9,25 of 59,1 of 1,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
4,3,Sean Strickland,0,31 of 87,35%,31 of 87,0 of 0,---,0,0,0:00,31 of 87,29 of 84,2 of 3,0 of 0,31 of 87,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
5,3,Dricus Du Plessis,0,29 of 84,34%,29 of 85,0 of 0,---,0,0,0:00,29 of 84,13 of 64,10 of 12,6 of 8,28 of 83,1 of 1,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
6,4,Sean Strickland,0,33 of 85,38%,34 of 87,0 of 0,---,0,0,0:00,33 of 85,32 of 84,1 of 1,0 of 0,33 of 85,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
7,4,Dricus Du Plessis,0,29 of 73,39%,31 of 75,3 of 5,60%,0,0,1:18,29 of 73,22 of 62,4 of 6,3 of 5,28 of 69,1 of 3,0 of 1,http://www.ufcstats.com/fight-details/14e53999...
8,5,Sean Strickland,0,53 of 99,53%,53 of 99,0 of 0,---,0,0,0:00,53 of 99,48 of 93,4 of 5,1 of 1,53 of 99,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
9,5,Dricus Du Plessis,0,35 of 86,40%,35 of 86,0 of 2,0%,0,0,0:00,35 of 86,27 of 77,6 of 7,2 of 2,35 of 86,0 of 0,0 of 0,http://www.ufcstats.com/fight-details/14e53999...
