In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import regex as re

In [2]:
def scrape_data(url_list):
    all_dfs = []  # List to store DataFrames from all URLs
    
    for url in url_list:
        response = requests.get(url)
        if response.status_code == 200:
            soup = bs(response.content, 'html.parser')

            # Initialize a list to store DataFrames
            dfs = []

            # Find all game divs
            game_divs = soup.find_all('div', class_='vm-stats-game')

            for game_div in game_divs:
                # Find the table within each game div
                table = game_div.find('table', class_='wf-table-inset mod-adv-stats')

                if table:
                    # Extract table data into a DataFrame
                    table_data = []
                    rows = table.find_all('tr')
                    for row in rows:
                        row_data = [cell.text.strip() for cell in row.find_all(['td', 'th'])]
                        table_data.append(row_data)

                    # Convert table_data into a DataFrame and append to dfs list
                    df = pd.DataFrame(table_data[1:], columns=table_data[0])  # Assuming first row is header
                    dfs.append(df)

            # Drop the first DataFrame (if available)
            if dfs:
                dfs.pop(0)  # Remove the first DataFrame
                all_dfs.extend(dfs)
            else:
                print('No DataFrames found for URL:', url)

        else:
            print('Failed to retrieve the webpage. Status code:', response.status_code)

    return all_dfs

def process_data_frames(data_frames):
    processed_dfs = {}  # Dictionary to store processed DataFrames

    for i, df in enumerate(data_frames):
        df_copy = df.copy()

        # Rename the columns
        df_copy.columns = ['name', 'blank', '2K', '3K', '4K', '5K', '1v1', '1v2', '1v3', '1v4', '1v5', 'ECON', 'PL', 'DE']

        # Clean the 'name' column
        df_copy['name'] = df_copy['name'].str.strip().str.replace('\t', '').str.replace('\n', '')

        # Clean the team names
        team_names = ['MIBR', 'LEV', 'SEN', 'NRG', 'FUR', '100T', 'LOUD', 'EG', 'G2', 'C9', 'KRÜ']
        for team in team_names:
            df_copy['name'] = df_copy['name'].str.replace(team, '').str.strip()

        # Drop all unneeded columns
        df_copy = df_copy.drop(columns=['blank', '2K', '3K', '4K', '5K', 'ECON', 'PL', 'DE'])

        # Extract the first character from each cell in 'col1'
        df_copy['1v1'] = df_copy['1v1'].str.extract(r'^(\d+)')
        df_copy['1v2'] = df_copy['1v2'].str.extract(r'^(\d+)')
        df_copy['1v3'] = df_copy['1v3'].str.extract(r'^(\d+)')
        df_copy['1v4'] = df_copy['1v4'].str.extract(r'^(\d+)')
        df_copy['1v5'] = df_copy['1v5'].str.extract(r'^(\d+)')

        df_copy = df_copy.fillna(0)

        # Convert all columns except the first one to integers
        for column in df_copy.columns[1:]:
            df_copy[column] = df_copy[column].astype(int)

        # Calculate the total clutches and create a new column
        df_copy['clutches'] = df_copy[['1v1', '1v2', '1v3', '1v4', '1v5']].sum(axis=1)

        # Drop all unneeded columns
        df_copy = df_copy.drop(columns=['1v1', '1v2', '1v3', '1v4', '1v5'])

        # Store the processed DataFrame with a unique name
        processed_dfs[f'df_{i+1}'] = df_copy
    
    return processed_dfs

In [3]:
# Example usage:
url_list = [
 'https://www.vlr.gg/353177/mibr-vs-leviat-n-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
 'https://www.vlr.gg/353178/sentinels-vs-nrg-esports-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
 'https://www.vlr.gg/353179/furia-vs-100-thieves-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
 'https://www.vlr.gg/353180/loud-vs-evil-geniuses-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
 'https://www.vlr.gg/353181/g2-esports-vs-cloud9-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
 'https://www.vlr.gg/353182/evil-geniuses-vs-furia-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353183/sentinels-vs-kr-esports-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353184/loud-vs-mibr-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353185/leviat-n-vs-100-thieves-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353186/nrg-esports-vs-g2-esports-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353187/sentinels-vs-cloud9-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353188/evil-geniuses-vs-leviat-n-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353189/loud-vs-furia-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353190/mibr-vs-100-thieves-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353191/nrg-esports-vs-kr-esports-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
 'https://www.vlr.gg/353192/g2-esports-vs-kr-esports-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
 'https://www.vlr.gg/353193/loud-vs-leviat-n-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
 'https://www.vlr.gg/353194/mibr-vs-furia-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
 'https://www.vlr.gg/353195/nrg-esports-vs-cloud9-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
 'https://www.vlr.gg/353196/evil-geniuses-vs-100-thieves-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
 'https://www.vlr.gg/353197/cloud9-vs-kr-esports-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
 'https://www.vlr.gg/353198/evil-geniuses-vs-mibr-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
 'https://www.vlr.gg/353199/leviat-n-vs-furia-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
 'https://www.vlr.gg/353200/loud-vs-100-thieves-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
 'https://www.vlr.gg/353201/sentinels-vs-g2-esports-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance'
]

data_frames = scrape_data(url_list)
processed_dfs = process_data_frames(data_frames)

No DataFrames found for URL: https://www.vlr.gg/353197/cloud9-vs-kr-esports-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance
No DataFrames found for URL: https://www.vlr.gg/353198/evil-geniuses-vs-mibr-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance
No DataFrames found for URL: https://www.vlr.gg/353199/leviat-n-vs-furia-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance
No DataFrames found for URL: https://www.vlr.gg/353200/loud-vs-100-thieves-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance
No DataFrames found for URL: https://www.vlr.gg/353201/sentinels-vs-g2-esports-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance


In [4]:
# Using list conversion and index
entry_key = list(processed_dfs.keys())[0]
df = processed_dfs[entry_key]
df

Unnamed: 0,name,clutches
0,ShahZaM,0
1,liazzi,0
2,artzin,0
3,mazin,1
4,Pa1nt,0
5,C0M,0
6,tex,1
7,Mazino,0
8,aspas,0
9,kiNgg,0


In [5]:
# Using list conversion and index
entry_key = list(processed_dfs.keys())[1]
df = processed_dfs[entry_key]
df

Unnamed: 0,name,clutches
0,ShahZaM,1
1,liazzi,0
2,artzin,0
3,mazin,1
4,Pa1nt,0
5,C0M,1
6,tex,0
7,Mazino,0
8,aspas,1
9,kiNgg,0


In [6]:
# Using list conversion and index
entry_key = list(processed_dfs.keys())[2]
df = processed_dfs[entry_key]
df

Unnamed: 0,name,clutches
0,TenZ,0
1,Sacy,0
2,Zellsis,0
3,johnqt,0
4,zekken,0
5,crashies,0
6,Victor,0
7,FiNESRG,0
8,s0m,1
9,Ethan,0
