In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
from sklearn.manifold import TSNE
import plotly.express as px
from sklearn.decomposition import PCA
from umap import UMAP
import pickle
import numpy as np
from valorant import *

In [4]:
# download this page -> https://www.vlr.gg/stats/?event_group_id=45&event_id=all&region=all&country=all&min_rounds=200&min_rating=1500&agent=all&map_id=all&timespan=all
with open("players.html", "r") as f:
    with open("players.txt", "w") as f2:
        for i in re.findall(r'<a href="(https://www.vlr.gg/player/\d+/\w+)"', f.read()):
            f2.write(i + "\n")

In [5]:
def get_player_stats(url: str, timespan: str = "all"):
    assert timespan in ["all", "90d", "60d", "30d"]
    response = requests.get(url + "?timespan=" + timespan)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find("table", class_="wf-table")
    team = soup.find("span", class_="m-item-team-tag").text.strip()
    name = url.split("/")[-1]
    columns = []
    data = []

    # Extract column names
    for th in table.find_all("th", title=True):
        columns.append(th["title"])

    # Extract rows
    for row in table.find_all("tr"):
        row_data = []
        for cell in row.find_all(["td", "img"]):
            if cell.name == "img":
                row_data.append(cell["alt"])
            else:
                row_data.append(cell.text.strip())

        data.append(row_data)

    # Remove the header row from data
    data = data[1:]
    df = pd.DataFrame(data, columns=[""] + columns)
    df["Usage count"] = df["Usage"].apply(lambda x: int(re.findall(r"\((\d+)\)", x)[0]))
    df["Usage perc"] = df["Usage"].apply(lambda x: float(re.findall(r"(\d+)\%", x)[0]) / 100)
    rating = (df["Rating"].replace("", np.nan).astype(float) * df["Usage perc"]).sum()
    return df.set_index("Agent"), team, name, rating


def get_player_row(url: str, timespan: str = "all") -> list:
    player_df, team, name, rating = get_player_stats(url, timespan)
    res = [name, team, rating]
    for i in agent_roles.keys():
        if i in player_df.index:
            res.append(player_df.loc[i, "Usage count"])
        else:
            res.append(0)
    return res


def get_region(team):
    if team in emea:
        return "EMEA"
    elif team in amer:
        return "AMERICAS"
    elif team in apac:
        return "PACIFIC"
    else:
        return "OTHER"

In [None]:
data = {}
with open("players.txt", "r") as f:
    for line in f:
        url = line.strip()
        try:
            stats, team, name, rating = get_player_stats(url, "90d")
            data[url] = {"stats": stats, "team": team, "name": name, "rating": rating}
        except:
            pass
with open("data-90d.pkl", "wb") as f:
    pickle.dump(data, f)

In [7]:
tsne = TSNE(n_components=2, random_state=0)
X = tsne.fit_transform(df[agents].values)
df["tsne-2d-one"] = X[:,0]
df["tsne-2d-two"] = X[:,1]
px.scatter(df, x="tsne-2d-one", y="tsne-2d-two", hover_name="name", hover_data=["team"] + agents, width=1000, height=1000, color="region")



In [9]:
tsne = UMAP(n_neighbors=5, min_dist=0.1, metric='correlation')
val = df[agents].T.values
X = tsne.fit_transform(val / val.sum(axis=0))
px.scatter(x=X[:,0], y=X[:,1], hover_name=agents, width=1000, height=1000)