In [1]:
# import dependencies
from bs4 import BeautifulSoup as bs
import requests
import re
import timeit
import pandas as pd

# Find Links

In [2]:
# url to the matches tab
url = 'https://www.vlr.gg/event/matches/2095/champions-tour-2024-americas-stage-2/?series_id=4031'

In [3]:
# scrape all the urls from the matches tab
print('Beginning URL Retrieval')
print('------------------------')

# get the html file using request
html_txt = requests.get(url)
soup = bs(html_txt.text, 'lxml')

try:
    # map to the correct location in the html file
    body = soup.find('body')
    div_container = body.find('div', class_='col-container')
    div_card = div_container.find_all('div', class_='wf-card')
    url_list = []
    perf_list = []
    over_list = []

    for idx, x in enumerate(div_card):
        if idx != 0:
            # create a base_url
            base_link = []
            base_url = f'https://www.vlr.gg'
            for link in div_card[idx].find_all('a'):
                base_link.append(link.get('href'))

            # fill the df with base urls
            for url in base_link:
                url_list.append(f'{base_url}{url}')
                
            # fill the df with the urls + /?game=all&tab=performance 
            # brings you to the performance tab 
            for url in base_link:
                perf_list.append(f'{base_url}{url}{"/?game=all&tab=performance"}')
                
            # fill the df with the urls + /?game=all&tab=overview 
            # brings you to the overview tab
            for url in base_link:
                over_list.append(f'{base_url}{url}{"/?game=all&tab=overview"}')

except AttributeError:
        print('There was a missing URL')

print('------------------------')
print(f'Found {len(url_list)} games!\n')

Beginning URL Retrieval
------------------------
------------------------
Found 25 games!



# Performance

In [4]:
def clean_dataframe(df, team_names):
    if df.empty:
        return None
    
    df_copy = df.copy()

    # Rename the columns
    df_copy.columns = ['name', 'blank', '2K', '3K', '4K', '5K', '1v1', '1v2', '1v3', '1v4', '1v5', 'ECON', 'PL', 'DE']
        
    # Clean the 'name' column
    df_copy['name'] = df_copy['name'].str.strip().str.replace('\t', '').str.replace('\n', '')
    
#     # Clean the team names
#     for team in team_names:
#         df_copy['name'] = df_copy['name'].str.replace(team, '').str.strip()

    # Drop all unneeded columns
    df_copy = df_copy.drop(columns=['blank', '2K', '3K', '4K', '5K', 'ECON', 'PL', 'DE'])
    
    # Apply a lambda function to extract the first number from each cell
    df_copy['1v1'] = df_copy['1v1'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v2'] = df_copy['1v2'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v3'] = df_copy['1v3'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v4'] = df_copy['1v4'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['1v5'] = df_copy['1v5'].apply(lambda x: x.split('\n')[0] if x else None)
    
    df_copy = df_copy.fillna(0)
    
     # Convert all columns except the first one to integers
    for column in df_copy.columns[1:]:
        df_copy[column] = df_copy[column].astype(int)

    # Calculate the total clutches and create a new column
    df_copy['clutches'] = df_copy[['1v1', '1v2', '1v3', '1v4', '1v5']].sum(axis=1)

    # Drop all unneeded columns
    df_copy = df_copy.drop(columns=['1v1', '1v2', '1v3', '1v4', '1v5'])

    return df_copy

def scrape_data(url_list):
    all_dfs = {}  # Dictionary to store processed DataFrames for each URL

    team_names = ['MIBR', 'LEV', 'SEN', 'NRG', 'FUR', '100T', 'LOUD', 'EG', 'G2', 'C9', 'KRÜ']

    for url in url_list:
        response = requests.get(url)
        if response.status_code == 200:
            soup = bs(response.content, 'html.parser')

            # Initialize lists to store DataFrames for each pass
            first_pass_dfs = []

            # Find all game divs
            game_divs = soup.find_all('div', class_='vm-stats-game')

            # First pass: Find initial tables
            for game_div in game_divs:
                table = game_div.find('table', class_='wf-table-inset mod-adv-stats')

                if table:
                    # Extract table data into a DataFrame
                    table_data = []
                    rows = table.find_all('tr')
                    for row in rows:
                        row_data = [cell.text.strip() for cell in row.find_all(['td', 'th'])]
                        table_data.append(row_data)

                    # Convert table_data into a DataFrame and append to first_pass_dfs list
                    df = pd.DataFrame(table_data[1:], columns=table_data[0])  # Assuming first row is header
                    first_pass_dfs.append(df)

            # Process and clean DataFrames from both passes
            first_pass_cleaned = [clean_dataframe(df, team_names) for df in first_pass_dfs if not df.empty]

            # Combine corresponding DataFrames from both passes
            combined_dfs = []
            min_length = min(len(first_pass_cleaned), len(first_pass_cleaned))
            for i in range(min_length):
                if first_pass_cleaned[i] is not None:
                    combined_df = pd.concat([first_pass_cleaned[i]], axis=0)
                    combined_dfs.append(combined_df)
                    combined_df.reset_index(inplace=True, drop=True)

            all_dfs[url] = combined_dfs

        else:
            print('Failed to retrieve the webpage. Status code:', response.status_code)

    return all_dfs

In [5]:
# Scrape the perf_list
perf_data = scrape_data(perf_list)

In [6]:
# remove the second df from each series

# Create a list of new keys
new_keys = [f'Series {i+1}' for i in range(len(perf_data))]

# Create a new dictionary with updated keys
perf_data = dict(zip(new_keys, perf_data.values()))

In [7]:
# check the first df
# Using list conversion and index
entry_key = list(perf_data.keys())[0]
df = perf_data[entry_key]
df

[          name  clutches
 0  ShahZaMMIBR         1
 1   liazziMIBR         0
 2   artzinMIBR         0
 3    mazinMIBR         2
 4    Pa1ntMIBR         0
 5       C0MLEV         1
 6       texLEV         1
 7    MazinoLEV         0
 8     aspasLEV         1
 9     kiNggLEV         0,
           name  clutches
 0  ShahZaMMIBR         0
 1   liazziMIBR         0
 2   artzinMIBR         0
 3    mazinMIBR         1
 4    Pa1ntMIBR         0
 5       C0MLEV         0
 6       texLEV         1
 7    MazinoLEV         0
 8     aspasLEV         0
 9     kiNggLEV         0,
           name  clutches
 0  ShahZaMMIBR         1
 1   liazziMIBR         0
 2   artzinMIBR         0
 3    mazinMIBR         1
 4    Pa1ntMIBR         0
 5       C0MLEV         1
 6       texLEV         0
 7    MazinoLEV         0
 8     aspasLEV         1
 9     kiNggLEV         0]

In [8]:
# Iterate through the dictionary and remove the first item from each list value
for key in perf_data:
    if len(perf_data[key]) > 1:
        del perf_data[key][0]  # Delete the second item (index 1)

In [9]:
# check the first df
# Using list conversion and index
entry_key = list(perf_data.keys())[0]
df = perf_data[entry_key]
df

[          name  clutches
 0  ShahZaMMIBR         0
 1   liazziMIBR         0
 2   artzinMIBR         0
 3    mazinMIBR         1
 4    Pa1ntMIBR         0
 5       C0MLEV         0
 6       texLEV         1
 7    MazinoLEV         0
 8     aspasLEV         0
 9     kiNggLEV         0,
           name  clutches
 0  ShahZaMMIBR         1
 1   liazziMIBR         0
 2   artzinMIBR         0
 3    mazinMIBR         1
 4    Pa1ntMIBR         0
 5       C0MLEV         1
 6       texLEV         0
 7    MazinoLEV         0
 8     aspasLEV         1
 9     kiNggLEV         0]

In [10]:
perf_data

{'Series 1': [          name  clutches
  0  ShahZaMMIBR         0
  1   liazziMIBR         0
  2   artzinMIBR         0
  3    mazinMIBR         1
  4    Pa1ntMIBR         0
  5       C0MLEV         0
  6       texLEV         1
  7    MazinoLEV         0
  8     aspasLEV         0
  9     kiNggLEV         0,
            name  clutches
  0  ShahZaMMIBR         1
  1   liazziMIBR         0
  2   artzinMIBR         0
  3    mazinMIBR         1
  4    Pa1ntMIBR         0
  5       C0MLEV         1
  6       texLEV         0
  7    MazinoLEV         0
  8     aspasLEV         1
  9     kiNggLEV         0],
 'Series 2': [          name  clutches
  0      TenZSEN         0
  1      SacySEN         0
  2   ZellsisSEN         0
  3    johnqtSEN         0
  4    zekkenSEN         0
  5  crashiesNRG         0
  6    VictorNRG         0
  7   FiNESSENRG         0
  8       s0mNRG         1
  9     EthanNRG         0,
            name  clutches
  0      TenZSEN         0
  1      SacySEN         0


# Overview

In [11]:
def clean_dataframe(df, team_names):
    if df.empty:
        return None
    
    df_copy = df.copy()

    # Rename the columns
    df_copy.columns = ['name', 'blank', 'rating', 'acs', 'kills', 'deaths', 'assists', 'k/d', 'KAST', 'adr', 'hs', 'fk', 'fd', 'fk/fd']

    # Clean the 'name' column
    df_copy['name'] = df_copy['name'].str.strip().str.replace('\t', '').str.replace('\n', '')
    
#     # Clean the team names
#     for team in team_names:
#         df_copy['name'] = df_copy['name'].str.replace(team, '').str.strip()

    # Drop all unneeded columns
    df_copy = df_copy.drop(columns=['blank', 'rating', 'acs', 'k/d', 'KAST', 'hs', 'fk/fd'])
    
    # Apply a lambda function to extract the first number from each cell
    df_copy['kills'] = df_copy['kills'].apply(lambda x: x.split('\n')[0] if x else None)

    # Use a try-except block to handle potential errors in 'deaths' column processing
    try:
        df_copy['deaths'] = df_copy['deaths'].apply(lambda x: int(re.findall(r'\d+', x)[0]) if x else None)
    except IndexError:
        df_copy['deaths'] = None  # Handle the error by assigning a default value

    df_copy['assists'] = df_copy['assists'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['adr'] = df_copy['adr'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['fk'] = df_copy['fk'].apply(lambda x: x.split('\n')[0] if x else None)
    df_copy['fd'] = df_copy['fd'].apply(lambda x: x.split('\n')[0] if x else None)

    return df_copy

def scrape_data(url_list):
    all_dfs = {}  # Dictionary to store processed DataFrames for each URL

    team_names = ['MIBR', 'LEV', 'SEN', 'NRG', 'FUR', '100T', 'LOUD', 'EG', 'G2', 'C9', 'KRÜ']

    for url in url_list:
        response = requests.get(url)
        if response.status_code == 200:
            soup = bs(response.content, 'html.parser')

            # Initialize lists to store DataFrames for each pass
            first_pass_dfs = []
            second_pass_dfs = []

            # Find all game divs
            game_divs = soup.find_all('div', class_='vm-stats-game')

            # First pass: Find initial tables
            for game_div in game_divs:
                table = game_div.find('table', class_='wf-table-inset mod-overview')

                if table:
                    # Extract table data into a DataFrame
                    table_data = []
                    rows = table.find_all('tr')
                    for row in rows:
                        row_data = [cell.text.strip() for cell in row.find_all(['td', 'th'])]
                        table_data.append(row_data)

                    # Convert table_data into a DataFrame and append to first_pass_dfs list
                    df = pd.DataFrame(table_data[1:], columns=table_data[0])  # Assuming first row is header
                    first_pass_dfs.append(df)

            # Second pass: Find the next tables
            for game_div in game_divs:
                table = game_div.find('table', class_='wf-table-inset mod-overview')
                if table:
                    next_table = table.find_next('table', class_='wf-table-inset mod-overview')
                    if next_table:
                        # Extract table data into a DataFrame
                        table_data = []
                        rows = next_table.find_all('tr')
                        for row in rows:
                            row_data = [cell.text.strip() for cell in row.find_all(['td', 'th'])]
                            table_data.append(row_data)

                        # Convert table_data into a DataFrame and append to second_pass_dfs list
                        df = pd.DataFrame(table_data[1:], columns=table_data[0])  # Assuming first row is header
                        second_pass_dfs.append(df)

            # Process and clean DataFrames from both passes
            first_pass_cleaned = [clean_dataframe(df, team_names) for df in first_pass_dfs if not df.empty]
            second_pass_cleaned = [clean_dataframe(df, team_names) for df in second_pass_dfs if not df.empty]

            # Combine corresponding DataFrames from both passes
            combined_dfs = []
            min_length = min(len(first_pass_cleaned), len(second_pass_cleaned))
            for i in range(min_length):
                if first_pass_cleaned[i] is not None and second_pass_cleaned[i] is not None:
                    combined_df = pd.concat([first_pass_cleaned[i], second_pass_cleaned[i]], axis=0)
                    combined_dfs.append(combined_df)
                    combined_df.reset_index(inplace=True, drop=True)

            all_dfs[url] = combined_dfs

        else:
            print('Failed to retrieve the webpage. Status code:', response.status_code)

    return all_dfs

In [12]:
over_data = scrape_data(over_list)

In [13]:
# remove the second df from each series

# Create a list of new keys
new_keys = [f'Series {i+1}' for i in range(len(over_data))]

# Create a new dictionary with updated keys
over_data = dict(zip(new_keys, over_data.values()))

In [14]:
over_data['Series 2']

[           name kills  deaths assists  adr fk fd
 0    zekken SEN    22      14       7  222  7  4
 1      Sacy SEN    19      10       9  161  0  2
 2   Zellsis SEN    16      10       6  131  1  0
 3      TenZ SEN    13      13      14  127  1  1
 4    johnqt SEN    10      11       5   96  2  2
 5       s0m NRG    14      17       6  142  4  4
 6     Ethan NRG    14      15      10  121  3  0
 7    Victor NRG    11      17       5  116  2  6
 8   FiNESSE NRG     9      16       4   79  1  0
 9  crashies NRG    10      16       4   92  0  2,
            name kills  deaths assists  adr  fk  fd
 0    zekken SEN    46      33      21  213  12   8
 1   Zellsis SEN    37      24      22  135   2   1
 2      TenZ SEN    36      29      24  134   5   3
 3      Sacy SEN    38      26      17  145   3   5
 4    johnqt SEN    25      28       9  102   3   4
 5       s0m NRG    30      35      21  131   4   6
 6     Ethan NRG    32      34      23  118   5   3
 7    Victor NRG    34      41   

In [15]:
# Iterate through the dictionary and remove the second item from each list value
for key in over_data:
    if len(over_data[key]) > 1:
        del over_data[key][1]  # Delete the second item (index 1)

In [16]:
over_data['Series 2'] 

[           name kills  deaths assists  adr fk fd
 0    zekken SEN    22      14       7  222  7  4
 1      Sacy SEN    19      10       9  161  0  2
 2   Zellsis SEN    16      10       6  131  1  0
 3      TenZ SEN    13      13      14  127  1  1
 4    johnqt SEN    10      11       5   96  2  2
 5       s0m NRG    14      17       6  142  4  4
 6     Ethan NRG    14      15      10  121  3  0
 7    Victor NRG    11      17       5  116  2  6
 8   FiNESSE NRG     9      16       4   79  1  0
 9  crashies NRG    10      16       4   92  0  2,
            name kills  deaths assists  adr fk fd
 0      TenZ SEN    23      16      10  140  4  2
 1    zekken SEN    24      19      14  206  5  4
 2   Zellsis SEN    21      14      16  138  1  1
 3      Sacy SEN    19      16       8  132  3  3
 4    johnqt SEN    15      17       4  108  1  2
 5     Ethan NRG    18      19      13  117  2  3
 6       s0m NRG    16      18      15  123  0  2
 7    Victor NRG    23      24       5  177  7  7

# Combining DFS

In [85]:
# Initialize an empty list to store all DataFrames
over_dfs = []

# Iterate through each series in re_dfs
for dfs_list in over_data.values():
    # Extend all_dfs with the list of DataFrames for the current series
    over_dfs.extend(dfs_list)

In [110]:
# Combine all dataframes into one
comb_over = pd.concat(over_dfs, ignore_index=True)

names_over = comb_over['name'].unique()

# Sort the array alphabetically
sorted_names_over = sorted(names_over)

In [87]:
# Convert numeric columns to numeric types if necessary
comb_over['kills'] = pd.to_numeric(comb_over['kills'], errors='coerce')
comb_over['deaths'] = pd.to_numeric(comb_over['deaths'], errors='coerce')
comb_over['assists'] = pd.to_numeric(comb_over['assists'], errors='coerce')
comb_over['adr'] = pd.to_numeric(comb_over['adr'], errors='coerce')
comb_over['fk'] = pd.to_numeric(comb_over['fk'], errors='coerce')
comb_over['fd'] = pd.to_numeric(comb_over['fd'], errors='coerce')
# combined_all['clutches'] = pd.to_numeric(combined_all['clutches'], errors='coerce')

# Example of summing each individual stat for each player
player_stats = comb_over.groupby('name').agg({
    'kills': 'sum',
    'deaths': 'sum',
    'assists': 'sum',
    'adr': 'mean',
    'fk': 'sum',
    'fd': 'sum'
#     'clutches': 'sum'
}).reset_index()

In [103]:
# Assuming player_stats is your DataFrame containing summed statistics for each player
# Convert 'name' column to lowercase for case-insensitive sorting
player_stats['name_lower'] = player_stats['name'].str.lower()

# Sort the DataFrame alphabetically by 'name_lower' column
player_stats_sorted = player_stats.sort_values(by='name_lower')

# Drop the 'name_lower' column if you don't need it anymore
player_stats_sorted = player_stats_sorted.drop(columns=['name_lower'])

# Print the sorted DataFrame
player_stats_sorted.reset_index(drop=True, inplace=True)

player_stats_sorted.head(2)

Unnamed: 0,name,kills,deaths,assists,adr,fk,fd
0,Apoth EG,113.0,150.0,58.0,112.1,16.0,25.0
1,artzin MIBR,140.0,142.0,43.0,136.444444,22.0,19.0


In [123]:
# Load the CSV file into a DataFrame
vlr_df = pd.read_csv('stats_vlr.csv')
vlr_df = vlr_df.drop('CL', axis = 1)

new_names = ['name', 'adr', 'kills', 'deaths', 'assists', 'fk', 'fd']

# Assign new column names
vlr_df.columns = new_names

new_order = ['name', 'kills', 'deaths', 'assists', 'adr', 'fk', 'fd']

vlr_df = vlr_df[new_order]

vlr_df

Unnamed: 0,name,kills,deaths,assists,adr,fk,fd
0,Apoth\nEG,113,150,58,111.3,16,25
1,artzin\nMIBR,140,142,43,138.7,22,19
2,aspas\nLEV,159,102,31,171.4,37,23
3,Asuna\n100T,138,140,95,134.1,29,27
4,bang\n100T,134,138,67,124.0,24,29
5,Boostio\n100T,137,146,68,138.5,14,15
6,C0M\nLEV,96,100,64,115.2,15,9
7,cauanzin\nLOUD,194,175,63,150.0,32,24
8,crashies\nNRG,126,137,70,113.4,9,13
9,Cryocells\n100T,149,148,45,129.1,26,22


In [101]:
# Compare the DataFrames
comparison = player_stats_sorted.compare(vlr_df)

In [105]:
comparison.head(2)

Unnamed: 0_level_0,name,name,adr,adr
Unnamed: 0_level_1,self,other,self,other
0,Apoth EG,Apoth\nEG,112.1,111.3
1,artzin MIBR,artzin\nMIBR,136.444444,138.7


In [63]:
# Initialize an empty list to store all DataFrames
perf_dfs = []

# Iterate through each series in re_dfs
for dfs_list in perf_data.values():
    # Extend all_dfs with the list of DataFrames for the current series
    perf_dfs.extend(dfs_list)

In [116]:
# Combine all dataframes into one
comb_perf = pd.concat(perf_dfs, ignore_index=True)

names_perf = comb_perf['name'].unique()

# Sort the array alphabetically
sorted_names_perf = sorted(names_perf)

In [65]:
comb_perf

Unnamed: 0,name,clutches
0,ShahZaMMIBR,0
1,liazziMIBR,0
2,artzinMIBR,0
3,mazinMIBR,1
4,Pa1ntMIBR,0
...,...,...
475,Asuna100T,1
476,Boostio100T,0
477,eeiu100T,0
478,bang100T,0


In [66]:
# Load the CSV file into a DataFrame
names_df = pd.read_csv('name_map.csv')

In [67]:
names_df

Unnamed: 0,game_id,player_name,agent,team_abrev,map
0,0,mazin,Omen,MIBR,Ascent
1,0,artzin,Yoru,MIBR,Ascent
2,0,ShahZaM,Sova,MIBR,Ascent
3,0,liazzi,Killjoy,MIBR,Ascent
4,0,Pa1nt,Iso,MIBR,Ascent
...,...,...,...,...,...
625,62,trent,not found,G2,TBD
626,62,valyn,not found,G2,TBD
627,62,JonahP,not found,G2,TBD
628,62,leaf,not found,G2,TBD


In [111]:
names_final = names_df['player_name'].unique()

In [112]:
# Sort the array alphabetically
sorted_names_final = sorted(names_final)

In [114]:
# Create a mapping dictionary
mapping = dict(zip(sorted_names_over, sorted_names_final))

# Replace values in 'name' column using the mapping
comb_over['name'] = comb_over['name'].replace(mapping)

In [115]:
comb_over

Unnamed: 0,name,kills,deaths,assists,adr,fk,fd
0,mazin,20,15,10,161,3,1
1,artzin,18,15,9,139,7,0
2,ShahZaM,12,14,9,122,2,1
3,liazzi,13,14,5,126,0,2
4,Pa1nt,11,17,6,98,2,4
...,...,...,...,...,...,...,...
625,trent,,,,,,
626,valyn,,,,,,
627,JonahP,,,,,,
628,leaf,,,,,,


In [117]:
# Create a mapping dictionary
mapping = dict(zip(sorted_names_perf, sorted_names_final))

# Replace values in 'name' column using the mapping
comb_perf['name'] = comb_perf['name'].replace(mapping)

In [118]:
comb_perf

Unnamed: 0,name,clutches
0,ShahZaM,0
1,liazzi,0
2,artzin,0
3,mazin,1
4,Pa1nt,0
...,...,...
475,Asuna,1
476,Boostio,0
477,eeiu,0
478,bang,0


In [48]:
# Left join
df_test = names_df.join(comb_over, how='left')

In [49]:
df_test.head(20)

Unnamed: 0,game_id,player_name,agent,team_abrev,map,kills,deaths,assists,adr,fk,fd
0,0,mazin,Omen,MIBR,Ascent,20,15,10,161,3,1
1,0,artzin,Yoru,MIBR,Ascent,18,15,9,139,7,0
2,0,ShahZaM,Sova,MIBR,Ascent,12,14,9,122,2,1
3,0,liazzi,Killjoy,MIBR,Ascent,13,14,5,126,0,2
4,0,Pa1nt,Iso,MIBR,Ascent,11,17,6,98,2,4
5,0,tex,Killjoy,LEV,Ascent,20,15,2,179,3,3
6,0,Mazino,Kayo,LEV,Ascent,22,15,9,175,1,3
7,0,kiNgg,Omen,LEV,Ascent,12,14,8,111,2,3
8,0,C0M,Sova,LEV,Ascent,10,14,7,106,0,1
9,0,aspas,Jett,LEV,Ascent,11,16,4,113,2,4


In [50]:
# Get the length of comb_perf
length_comb_perf = len(comb_perf)

# Select only the rows in df_test that are within the range of length_comb_perf
df_test_trimmed = df_test.iloc[:length_comb_perf]

In [29]:
# Remove the 'name' column from comb_perf
comb_perf = comb_perf.drop(columns=['name'])

# Combine comb_perf and df_test_trimmed on the index
combined_all = df_test_trimmed.join(comb_perf, how='left')

In [51]:
combined_all = df_test_trimmed

In [52]:
# Assuming combined_all is your DataFrame
# Save to CSV
# combined_all.to_csv('database.csv', index=False)

In [57]:
# Convert numeric columns to numeric types if necessary
combined_all['kills'] = pd.to_numeric(combined_all['kills'], errors='coerce')
combined_all['deaths'] = pd.to_numeric(combined_all['deaths'], errors='coerce')
combined_all['assists'] = pd.to_numeric(combined_all['assists'], errors='coerce')
combined_all['adr'] = pd.to_numeric(combined_all['adr'], errors='coerce')
combined_all['fk'] = pd.to_numeric(combined_all['fk'], errors='coerce')
combined_all['fd'] = pd.to_numeric(combined_all['fd'], errors='coerce')
# combined_all['clutches'] = pd.to_numeric(combined_all['clutches'], errors='coerce')

# Example of summing each individual stat for each player
player_stats = combined_all.groupby('player_name').agg({
    'kills': 'sum',
    'deaths': 'sum',
    'assists': 'sum',
    'adr': 'mean',
    'fk': 'sum',
    'fd': 'sum'
#     'clutches': 'sum'
}).reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

In [58]:
# Assuming player_stats is your DataFrame containing summed statistics for each player
# Convert 'name' column to lowercase for case-insensitive sorting
player_stats['name_lower'] = player_stats['player_name'].str.lower()

# Sort the DataFrame alphabetically by 'name_lower' column
player_stats_sorted = player_stats.sort_values(by='name_lower')

# Drop the 'name_lower' column if you don't need it anymore
player_stats_sorted = player_stats_sorted.drop(columns=['name_lower'])

# Print the sorted DataFrame
player_stats_sorted

Unnamed: 0,player_name,kills,deaths,assists,adr,fk,fd
0,Apoth,110,153,62,108.9,14,23
25,artzin,133,131,46,131.888889,20,14
26,aspas,120,118,32,137.0,22,18
1,Asuna,146,135,79,140.0,21,21
27,bang,115,140,67,110.555556,27,24
2,Boostio,144,140,56,146.444444,16,23
3,C0M,105,114,55,123.75,15,20
28,cauanzin,206,159,61,155.181818,28,25
29,crashies,125,145,65,109.8,14,16
4,Cryocells,146,141,51,125.111111,19,19


In [119]:
# Initialize an empty list to store all DataFrames
over_dfs = []

# Iterate through each series in re_dfs
for dfs_list in over_data.values():
    # Extend all_dfs with the list of DataFrames for the current series
    over_dfs.extend(dfs_list)

In [120]:
# Initialize an empty list to store all DataFrames
perf_dfs = []

# Iterate through each series in re_dfs
for dfs_list in perf_data.values():
    # Extend all_dfs with the list of DataFrames for the current series
    perf_dfs.extend(dfs_list)

In [None]:
# Combine all dataframes into one
comb_over = pd.concat(over_dfs, ignore_index=True)

names_over = comb_over['name'].unique()

# Sort the array alphabetically
sorted_names_over = sorted(names_over)

In [None]:
# Combine all dataframes into one
comb_perf = pd.concat(perf_dfs, ignore_index=True)

names_perf = comb_perf['name'].unique()

# Sort the array alphabetically
sorted_names_perf = sorted(names_perf)

In [121]:
over_dfs

[           name kills  deaths assists  adr fk fd
 0    mazin MIBR    20      15      10  161  3  1
 1   artzin MIBR    18      15       9  139  7  0
 2  ShahZaM MIBR    12      14       9  122  2  1
 3   liazzi MIBR    13      14       5  126  0  2
 4    Pa1nt MIBR    11      17       6   98  2  4
 5       tex LEV    20      15       2  179  3  3
 6    Mazino LEV    22      15       9  175  1  3
 7     kiNgg LEV    12      14       8  111  2  3
 8       C0M LEV    10      14       7  106  0  1
 9     aspas LEV    11      16       4  113  2  4,
            name kills  deaths assists  adr fk fd
 0    mazin MIBR    15      14       5  150  3  1
 1   artzin MIBR    16      15       4  150  0  3
 2  ShahZaM MIBR    11      15       5  127  1  3
 3    Pa1nt MIBR     8      18       6  105  2  5
 4   liazzi MIBR     8      16       1   80  1  1
 5     kiNgg LEV    22      10       8  210  4  0
 6     aspas LEV    21       8       4  179  5  1
 7       C0M LEV    17      13       5  156  2  0