In [19]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape data for a specific event
def scrape_event_data(event_name, event_id):
    url = f'https://www.vlr.gg/stats/?event_group_id={event_id}&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all'
    print(url)
    
    # Send an HTTP request to the webpage
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the table with player stats
    table = soup.find('table')
    if table is None:
        print(f"No table found for {event_name}. Check the page structure or event ID.")
        return None  # Return None if no table is found
    
    # Extract the table headers
    headers = [th.text for th in table.find_all('th')]
    headers.append('Agent Image URLs')  # Add a column for agent image URLs
    headers.append('Player URL')  # Add a column for player URLs
    
    # Extract table rows
    rows = []
    for row in table.find_all('tr')[1:]:
        cells = row.find_all('td')
        row_data = [cell.text.strip() for cell in cells]
        
        # Find the 'mod-agents' column that contains multiple images
        agent_td = row.find('td', class_='mod-agents')
        if agent_td:
            agent_imgs = agent_td.find_all('img')
            img_urls = [img['src'].replace('/img/vlr/game/agents/', '').replace('.png', '') for img in agent_imgs] if agent_imgs else ['N/A']
        else:
            img_urls = ['N/A']
        
        row_data.append(', '.join(img_urls))  # Append the list of image URLs to the row
        
        # Find the player's profile link
        player_link = row.find('a')
        if player_link and 'href' in player_link.attrs:
            player_url = 'https://www.vlr.gg' + player_link['href']+ '?timespan=all'
        else:
            player_url = 'N/A'
        
        
        row_data.append(player_url)  # Append the player URL to the row
        rows.append(row_data)
    
    # Convert the data into a DataFrame
    df = pd.DataFrame(rows, columns=headers)
    #print(df)
    
    return df  # Return the DataFrame

# List of events with correct event IDs
events = {
    'Valorant Game Changers 2023': '38',
    'Valorant Game Changers 2024': '62',
    'Challengers League 2023': '31',
    'Challengers League 2024': '59',
    'Valorant Champions Tour 2023': '45',
    'Valorant Champions Tour 2024': '61'
}

# List to hold DataFrames
all_dataframes = []

# Loop through each event and scrape data
for event_name, event_id in events.items():
    event_df = scrape_event_data(event_name, event_id)
    if event_df is not None:
        all_dataframes.append(event_df)  # Add the DataFrame to the list

# Concatenate all DataFrames and save as a single CSV
if all_dataframes:
    final_df = pd.concat(all_dataframes, ignore_index=True)
    final_df.to_csv('all_events_data.csv', index=False)
    print("All event data saved as all_events_data.csv")



# Replace 'Player Name' with the actual column name for player names in your dataset

final_df = final_df.drop_duplicates(subset=['Player'], keep='first')

# Save the cleaned DataFrame to a new CSV file
final_df.to_csv('cleaned_player_data.csv', index=False)

print("Duplicates removed and saved to cleaned_player_data.csv")


import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape player profiles for agents and team name
def scrape_player_profiles(player_df):
    # Create a DataFrame to hold agent data for all players
    all_agent_data = []

    # Iterate through each player's URL in the DataFrame
    for index, row in player_df.iterrows():
        player_url = row['Player URL']
        print(f"Fetching data for player: {player_url}")

        # Initialize retry variables
        retries = 3
        for attempt in range(retries):
            try:
                # Fetch the player's profile page
                player_response = requests.get(player_url)
                player_response.raise_for_status()  # Check for HTTP errors
                player_soup = BeautifulSoup(player_response.text, 'html.parser')

                # Extract agent data
                agent_table = player_soup.find('table', {'class': 'wf-table'})  # Use class to identify the table
                agent_data_list = []  # Temporary list to store agent data for the current player
                
                if agent_table:
                    print("Agent Table Found. Extracting agent names and images...")
                    agent_rows = agent_table.find_all('tr')[1:]  # Skip header
                    if not agent_rows:
                        print("No agent rows found in the table.")
                    for agent_row in agent_rows:
                        # Find agent image and name
                        agent_img_tag = agent_row.find('img')
                        agent_img_url = 'https://www.vlr.gg' + agent_img_tag['src'] if agent_img_tag else 'N/A'
                        
                        agent_name = agent_row.find('td', style="padding-left: 8px; padding-right: 8px;").text.strip() if agent_row.find('td', style="padding-left: 8px; padding-right: 8px;") else 'N/A'

                        agent_data_list.append({'Agent Name': agent_name, 'Image URL': agent_img_url})

                else:
                    print("No agent table found.")

                # Extract current team data
                team_section = player_soup.find('a', {'class': 'wf-module-item mod-first'})
                if team_section:
                    team_name = team_section.find('div', style="font-weight: 500;").text.strip()
                else:
                    team_name = 'N/A'

                # Store player URL and team name in all_agent_data list
                all_agent_data.append({'Player URL': player_url, 'All Agents': agent_data_list, 'Current Team': team_name})

                # Wait a bit between requests
                time.sleep(1)  # Adjust the delay as needed
                break  # Exit retry loop if successful

            except requests.exceptions.RequestException as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt == retries - 1:
                    print(f"Giving up on {player_url}")
                else:
                    time.sleep(2)  # Wait a bit before retrying

    # Create a DataFrame for all agent data
    agents_df = pd.DataFrame(all_agent_data)

    # Print the entire DataFrame
    pd.set_option('display.max_columns', None)  # Show all columns
    pd.set_option('display.max_rows', None)  # Show all rows
    pd.set_option('display.width', None)  # No limit on width

    print(agents_df)

    # Save the agent data DataFrame to CSV
    agents_df.to_csv('all_players_agents_data.csv', index=False)
    print("Agent data saved as all_players_agents_data.csv")

# Assuming the final DataFrame from the previous scraping step is named final_df
# You should replace this with the DataFrame you created earlier
# Scrape player profiles based on the DataFrame
scrape_player_profiles(final_df)
         


https://www.vlr.gg/stats/?event_group_id=38&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
https://www.vlr.gg/stats/?event_group_id=62&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
https://www.vlr.gg/stats/?event_group_id=31&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
https://www.vlr.gg/stats/?event_group_id=59&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
https://www.vlr.gg/stats/?event_group_id=45&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
https://www.vlr.gg/stats/?event_group_id=61&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
All event data saved as all_events_data.csv
Duplicates removed and saved to cleaned_player_data.csv
Fetching data for player: https://www.vlr.gg/player/17976/florescent?timespan=all
Agent Table Found. E

                                          Player URL  \
0  https://www.vlr.gg/player/17976/florescent?tim...   
1  https://www.vlr.gg/player/10653/suzu?timespan=all   
2  https://www.vlr.gg/player/30123/bakkushaw?time...   
3  https://www.vlr.gg/player/21265/keenc?timespan...   
4  https://www.vlr.gg/player/18215/rachell?timesp...   

                                          All Agents  \
0  [{'Agent Name': '', 'Image URL': 'https://www....   
1  [{'Agent Name': '', 'Image URL': 'https://www....   
2  [{'Agent Name': '', 'Image URL': 'https://www....   
3  [{'Agent Name': '', 'Image URL': 'https://www....   
4  [{'Agent Name': '', 'Image URL': 'https://www....   

                          Current Team  
0                    Shopify Rebellion  
1                     ZETA DIVISION GC  
2  Anorthosis Famagusta Esports Female  
3                     Evil Geniuses GC  
4                          Exalty Gaïa  
                                          Player URL  \
0  https://www.vlr.gg/pl

                                          Player URL  \
0  https://www.vlr.gg/player/17976/florescent?tim...   
1  https://www.vlr.gg/player/10653/suzu?timespan=all   
2  https://www.vlr.gg/player/30123/bakkushaw?time...   
3  https://www.vlr.gg/player/21265/keenc?timespan...   
4  https://www.vlr.gg/player/18215/rachell?timesp...   

                                          All Agents  \
0  jett, raze, neon, reyna, yoru, chamber, kayo, ...   
1  raze, jett, sova, omen, sage, chamber, killjoy...   
2  breach, skye, kayo, sova, sage, fade, chamber,...   
3  jett, chamber, raze, killjoy, sova, kayo, brea...   
4  sova, cypher, skye, sage, kayo, reyna, breach,...   

                          Current Team             Player Agents  Rnd  R2.0  \
0                    Shopify Rebellion     florescent\nSR   (+1)  986  1.48   
1                     ZETA DIVISION GC        suzu\nFL.GC    NaN  590  1.38   
2  Anorthosis Famagusta Esports Female  bakkushaw\nQLSH.S   (+1)  233  1.37   
3         

In [3]:
import requests
import pandas as pd

# Base API URL to fetch player list
api_url = 'https://vlr.orlandomm.net/api/v1/players'

# Parameters for the request to fetch all players
params = {
    'limit': 'all',  # Fetch all players
    'minrounds': 200,
    'minrating': 1550,
    'timespan': '400'
}

# Send a GET request to the API to fetch all players
response = requests.get(api_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON data
    data = response.json()

    # Extract player data
    players = data['data']

    # Initialize a list to store detailed player data
    detailed_players = []

    # Loop through each player to get detailed stats using their playerid
    for player in players:
        playerid = player['id']  # Extract player ID from the general list

        # API URL to get player stats using playerid
        player_stats_url = f'https://vlr.orlandomm.net/api/v1/players/{playerid}'

        # Send a GET request to fetch player stats
        stats_response = requests.get(player_stats_url)

        if stats_response.status_code == 200:
            # Parse the player's detailed data
            player_stats_data = stats_response.json()

            # Extract detailed player info
            info = player_stats_data['data']['info']
            team = player_stats_data['data'].get('team', {})
            results = player_stats_data['data'].get('results', [])
            past_teams = player_stats_data['data'].get('pastTeams', [])
            socials = player_stats_data['data'].get('socials', {})

            # Create a dictionary with the player's data
            player_info = {
                'Player ID': info.get('id', 'N/A'),
                'Username': info.get('user', 'N/A'),
                'Name': info.get('name', 'N/A'),
                'Country': info.get('country', 'N/A'),
                'Flag': info.get('flag', 'N/A'),
                'Player Image URL': info.get('img', 'N/A'),
                'Player Profile URL': info.get('url', 'N/A'),
                'Team ID': team.get('id', 'N/A'),
                'Team Name': team.get('name', 'N/A'),
                'Team Logo URL': team.get('logo', 'N/A'),
                'Team Joined': team.get('joined', 'N/A'),
                'Matches Played': len(results),
                'Past Teams': ', '.join([team['name'] for team in past_teams]) if past_teams else 'N/A',
                'Twitter': socials.get('twitter', 'N/A'),
                'Twitter URL': socials.get('twitter_url', 'N/A'),
                'Twitch': socials.get('twitch', 'N/A'),
                'Twitch URL': socials.get('twitch_url', 'N/A')
            }

            # Add the merged info to the list
            detailed_players.append(player_info)

        else:
            print(f"Failed to fetch stats for player {playerid}. Status code: {stats_response.status_code}")

    # Convert the detailed player data into a DataFrame
    df = pd.DataFrame(detailed_players)

    # Save to CSV
    df.to_csv('vlr_detailed_player_data.csv', index=False)
    print("CSV with detailed player data saved successfully!")

else:
    print(f"Failed to fetch player data. Status code: {response.status_code}")


Failed to fetch player data. Status code: 523


https://www.vlr.gg/stats/?event_group_id=38&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
CSV for Valorant Game Changers 2023 saved as valorant_game_changers_2023.csv
https://www.vlr.gg/stats/?event_group_id=62&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
CSV for Valorant Game Changers 2024 saved as valorant_game_changers_2024.csv
https://www.vlr.gg/stats/?event_group_id=31&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
CSV for Challengers League 2023 saved as challengers_league_2023.csv
https://www.vlr.gg/stats/?event_group_id=59&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
CSV for Challengers League 2024 saved as challengers_league_2024.csv
https://www.vlr.gg/stats/?event_group_id=45&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
CSV for Valorant Champions Tour 2023 saved 

TypeError: scrape_event_data() missing 1 required positional argument: 'filename'

https://www.vlr.gg/stats/?event_group_id=38&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
                Player Agents  Rnd  R2.0    ACS   K:D KAST    ADR   KPR   APR  \
0       florescent\nSR   (+1)  986  1.48  304.4  1.74  78%  197.1  1.09  0.21   
1          suzu\nFL.GC         590  1.38  306.6  1.55  76%  195.4  1.05  0.24   
2    bakkushaw\nQLSH.S   (+1)  233  1.37  263.3  1.41  81%  168.0  0.97  0.43   
3         keenc\nEG.GC         884  1.37  291.8  1.53  77%  181.0  1.02  0.19   
4        rachell\nExal   (+1)  282  1.36  258.2  1.42  78%  164.2  0.90  0.59   
..                 ...    ...  ...   ...    ...   ...  ...    ...   ...   ...   
513          Skel\nSRN   (+6)  200  0.67  155.5  0.60  63%  110.4  0.52  0.29   
514          drea\nINF   (+2)  231  0.67  158.6  0.67  63%  105.9  0.54  0.23   
515         aley\nTama   (+1)  213  0.65  159.1  0.62  59%  107.9  0.54  0.24   
516              Ender   (+3)  231  0.65  154.7  0.60  62

https://www.vlr.gg/stats/?event_group_id=38&event_id=all&region=all&min_rounds=200&min_rating=1550&agent=all&map_id=all&timespan=all
                Player Agents  Rnd  R2.0    ACS   K:D KAST    ADR   KPR   APR  \
0       florescent\nSR   (+1)  986  1.48  304.4  1.74  78%  197.1  1.09  0.21   
1          suzu\nFL.GC         590  1.38  306.6  1.55  76%  195.4  1.05  0.24   
2    bakkushaw\nQLSH.S   (+1)  233  1.37  263.3  1.41  81%  168.0  0.97  0.43   
3         keenc\nEG.GC         884  1.37  291.8  1.53  77%  181.0  1.02  0.19   
4        rachell\nExal   (+1)  282  1.36  258.2  1.42  78%  164.2  0.90  0.59   
..                 ...    ...  ...   ...    ...   ...  ...    ...   ...   ...   
513          Skel\nSRN   (+6)  200  0.67  155.5  0.60  63%  110.4  0.52  0.29   
514          drea\nINF   (+2)  231  0.67  158.6  0.67  63%  105.9  0.54  0.23   
515         aley\nTama   (+1)  213  0.65  159.1  0.62  59%  107.9  0.54  0.24   
516              Ender   (+3)  231  0.65  154.7  0.60  62