In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import regex as re

In [2]:
def clean_dataframe(df, team_names):
    if df.empty:
        return None
    
    df_copy = df.copy()

    # Rename the columns
    df_copy.columns = ['name', 'blank', '2K', '3K', '4K', '5K', '1v1', '1v2', '1v3', '1v4', '1v5', 'ECON', 'PL', 'DE']
        
    # Clean the 'name' column
    df_copy['name'] = df_copy['name'].str.strip().str.replace('\t', '').str.replace('\n', '')
    
#     # Clean the team names
#     for team in team_names:
#         df_copy['name'] = df_copy['name'].str.replace(team, '').str.strip()

    # Extract team abbreviation and concatenate with player name
    df_copy['name'] = df_copy['name'].apply(lambda x: x.replace(' ', '') + x.split()[-1])

    # Drop all unneeded columns
    df_copy = df_copy.drop(columns=['blank', '2K', '3K', '4K', '5K', 'ECON', 'PL', 'DE'])
    
    # Apply a lambda function to extract the first number from each cell
    df_copy['1v1'] = df_copy['1v1'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v2'] = df_copy['1v2'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v3'] = df_copy['1v3'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v4'] = df_copy['1v4'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v5'] = df_copy['1v5'].apply(lambda x: x.split('\n')[0] if x else None)
    
    df_copy = df_copy.fillna(0)
    
     # Convert all columns except the first one to integers
    for column in df_copy.columns[1:]:
        df_copy[column] = df_copy[column].astype(int)

    # Calculate the total clutches and create a new column
    df_copy['clutches'] = df_copy[['1v1', '1v2', '1v3', '1v4', '1v5']].sum(axis=1)

    # Drop all unneeded columns
    df_copy = df_copy.drop(columns=['1v1', '1v2', '1v3', '1v4', '1v5'])

    return df_copy

def scrape_data(url_list):
    all_dfs = {}  # Dictionary to store processed DataFrames for each URL

    team_names = ['MIBR', 'LEV', 'SEN', 'NRG', 'FUR', '100T', 'LOUD', 'EG', 'G2', 'C9', 'KRÜ']

    for url in url_list:
        response = requests.get(url)
        if response.status_code == 200:
            soup = bs(response.content, 'html.parser')

            # Initialize lists to store DataFrames for each pass
            first_pass_dfs = []

            # Find all game divs
            game_divs = soup.find_all('div', class_='vm-stats-game')

            # First pass: Find initial tables
            for game_div in game_divs:
                table = game_div.find('table', class_='wf-table-inset mod-adv-stats')

                if table:
                    # Extract table data into a DataFrame
                    table_data = []
                    rows = table.find_all('tr')
                    for row in rows:
                        row_data = [cell.text.strip() for cell in row.find_all(['td', 'th'])]
                        table_data.append(row_data)

                    # Convert table_data into a DataFrame and append to first_pass_dfs list
                    df = pd.DataFrame(table_data[1:], columns=table_data[0])  # Assuming first row is header
                    first_pass_dfs.append(df)

            # Process and clean DataFrames from both passes
            first_pass_cleaned = [clean_dataframe(df, team_names) for df in first_pass_dfs if not df.empty]

            # Combine corresponding DataFrames from both passes
            combined_dfs = []
            min_length = min(len(first_pass_cleaned), len(first_pass_cleaned))
            for i in range(min_length):
                if first_pass_cleaned[i] is not None:
                    combined_df = pd.concat([first_pass_cleaned[i]], axis=0)
                    combined_dfs.append(combined_df)
                    combined_df.reset_index(inplace=True, drop=True)

            all_dfs[url] = combined_dfs

        else:
            print('Failed to retrieve the webpage. Status code:', response.status_code)

    return all_dfs

In [3]:
# Example usage:
url_list = [
 'https://www.vlr.gg/353177/mibr-vs-leviat-n-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance' #,
#  'https://www.vlr.gg/353178/sentinels-vs-nrg-esports-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
#  'https://www.vlr.gg/353179/furia-vs-100-thieves-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
#  'https://www.vlr.gg/353180/loud-vs-evil-geniuses-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
#  'https://www.vlr.gg/353181/g2-esports-vs-cloud9-champions-tour-2024-americas-stage-2-w1/?game=all&tab=performance',
#  'https://www.vlr.gg/353182/evil-geniuses-vs-furia-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353183/sentinels-vs-kr-esports-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353184/loud-vs-mibr-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353185/leviat-n-vs-100-thieves-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353186/nrg-esports-vs-g2-esports-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353187/sentinels-vs-cloud9-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353188/evil-geniuses-vs-leviat-n-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353189/loud-vs-furia-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353190/mibr-vs-100-thieves-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353191/nrg-esports-vs-kr-esports-champions-tour-2024-americas-stage-2-w2/?game=all&tab=performance',
#  'https://www.vlr.gg/353192/g2-esports-vs-kr-esports-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
#  'https://www.vlr.gg/353193/loud-vs-leviat-n-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
#  'https://www.vlr.gg/353194/mibr-vs-furia-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
#  'https://www.vlr.gg/353195/nrg-esports-vs-cloud9-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
#  'https://www.vlr.gg/353196/evil-geniuses-vs-100-thieves-champions-tour-2024-americas-stage-2-w3/?game=all&tab=performance',
#  'https://www.vlr.gg/353197/cloud9-vs-kr-esports-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
#  'https://www.vlr.gg/353198/evil-geniuses-vs-mibr-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
#  'https://www.vlr.gg/353199/leviat-n-vs-furia-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
#  'https://www.vlr.gg/353200/loud-vs-100-thieves-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance',
#  'https://www.vlr.gg/353201/sentinels-vs-g2-esports-champions-tour-2024-americas-stage-2-w4/?game=all&tab=performance'
]

data_frames = scrape_data(url_list)

In [4]:
# Using list conversion and index
entry_key = list(data_frames.keys())[0]
df = data_frames[entry_key]
df

[                     name  clutches
 0  ShahZaMMIBRShahZaMMIBR         1
 1    liazziMIBRliazziMIBR         0
 2    artzinMIBRartzinMIBR         0
 3      mazinMIBRmazinMIBR         2
 4      Pa1ntMIBRPa1ntMIBR         0
 5            C0MLEVC0MLEV         1
 6            texLEVtexLEV         1
 7      MazinoLEVMazinoLEV         0
 8        aspasLEVaspasLEV         1
 9        kiNggLEVkiNggLEV         0,
                      name  clutches
 0  ShahZaMMIBRShahZaMMIBR         0
 1    liazziMIBRliazziMIBR         0
 2    artzinMIBRartzinMIBR         0
 3      mazinMIBRmazinMIBR         1
 4      Pa1ntMIBRPa1ntMIBR         0
 5            C0MLEVC0MLEV         0
 6            texLEVtexLEV         1
 7      MazinoLEVMazinoLEV         0
 8        aspasLEVaspasLEV         0
 9        kiNggLEVkiNggLEV         0,
                      name  clutches
 0  ShahZaMMIBRShahZaMMIBR         1
 1    liazziMIBRliazziMIBR         0
 2    artzinMIBRartzinMIBR         0
 3      mazinMIBRmazinMIBR         1

In [5]:
# Iterate through the dictionary and remove the first item from each list value
for key in data_frames:
    if len(data_frames[key]) > 1:
        del data_frames[key][0]  # Delete the second item (index 1)

In [6]:
# Using list conversion and index
entry_key = list(data_frames.keys())[1]
df = data_frames[entry_key]
df

IndexError: list index out of range

In [8]:
# Your original table
data = {
    'name': ['mazin MIBR', 'artzin MIBR', 'ShahZaM MIBR', 'liazzi MIBR', 'Pa1nt MIBR',
             'tex LEV', 'Mazino LEV', 'kiNgg LEV', 'C0M LEV', 'aspas LEV'],
    'kills': [20, 18, 12, 13, 11, 20, 22, 12, 10, 11],
    'deaths': [15, 15, 14, 14, 17, 15, 15, 14, 14, 16],
    'assists': [10, 9, 9, 5, 6, 2, 9, 8, 7, 4],
    'adr': [161, 139, 122, 126, 98, 179, 175, 111, 106, 113],
    'fk': [3, 7, 2, 0, 2, 3, 1, 2, 0, 2],
    'fd': [1, 0, 1, 2, 4, 3, 3, 3, 1, 4]
}

df = pd.DataFrame(data)

# Extract team abbreviation and concatenate with player name without any spaces
df['name'] = df.apply(lambda row: row['name'].replace(' ', '').replace(row['name'].split()[-1], ''), axis=1)

print(df)

      name  kills  deaths  assists  adr  fk  fd
0    mazin     20      15       10  161   3   1
1   artzin     18      15        9  139   7   0
2  ShahZaM     12      14        9  122   2   1
3   liazzi     13      14        5  126   0   2
4    Pa1nt     11      17        6   98   2   4
5      tex     20      15        2  179   3   3
6   Mazino     22      15        9  175   1   3
7    kiNgg     12      14        8  111   2   3
8      C0M     10      14        7  106   0   1
9    aspas     11      16        4  113   2   4
