In [247]:
# Goal: Predict the win rate of a team/combination of agents given minimal data
# such as statistics for the first few rounds (or just the team composition)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

engine = create_engine("sqlite:///data/valorant.sqlite")

scoreboard = pd.read_sql_table("Game_Scoreboard", con=engine)
rounds = pd.read_sql_table("Game_Rounds", con=engine)
games = pd.read_sql_table("Games", con=engine)

engine.dispose()

In [248]:
print("Scoreboard:")
print(scoreboard.head())
print("Rounds:")
print(rounds.head())
print("Games:")
print(games.head())

Scoreboard:
  GameID PlayerID PlayerName TeamAbbreviation    Agent    ACS  Kills  Deaths  \
0  60894     8419     Reduxx             Boos     jett  313.0   24.0    10.0   
1  60894      466     ChurmZ             Boos  chamber  227.0   16.0    10.0   
2  60894     3712   diaamond             Boos     sova  226.0   17.0     9.0   
3  60894     5099     Boltzy             Boos    viper  218.0   17.0    12.0   
4  60894     3983     Virtyy             Boos     skye   80.0    5.0    13.0   

   Assists  PlusMinus  ...  Num_4Ks  Num_5Ks  OnevOne  OnevTwo  OnevThree  \
0      3.0       14.0  ...      2.0      0.0      1.0      0.0        0.0   
1      7.0        6.0  ...      0.0      0.0      0.0      0.0        0.0   
2      8.0        8.0  ...      0.0      0.0      1.0      0.0        0.0   
3      2.0        5.0  ...      0.0      0.0      1.0      0.0        0.0   
4      3.0       -8.0  ...      0.0      0.0      0.0      0.0        0.0   

   OnevFour  OnevFive  Econ  Plants  Defuses

__Small-scale test for win-rate based on team composition:__

In [249]:
# Desired DataFrame structure:
# Index: GameID, Columns: Team1, Team1 agents, Team2, Team2 agents, Winner

print(len(scoreboard))
#scoreboard_grouped = scoreboard.copy().groupby('GameID')
game_ids_s = scoreboard['GameID'].unique()
print(len(game_ids_s))
#print(len(scoreboard_grouped))
print("------")
print(len(rounds))
print("------")

games = games[games['GameID'].isin(game_ids_s)]

game_ids_g = games['GameID'].unique()
scoreboard = scoreboard[scoreboard['GameID'].isin(game_ids_g)]

print(len(scoreboard))
print(len(games))
#print(scoreboard.head())

157939
15878
------
15531
------
157939
15878


In [250]:
# Step 1: Group agents by GameID and TeamAbbreviation into sets
team_compositions = pd.pivot_table(
    scoreboard,
    values='Agent',  # The column containing the agents
    index=['GameID', 'TeamAbbreviation'],  # Rows will be grouped by GameID and TeamID
    aggfunc=lambda x: set(x)  # Combine agent names into a set for each team
)

# Filter GameIDs with exactly two teams
team_counts = team_compositions.groupby(level='GameID').size()
valid_game_ids = team_counts[team_counts == 2].index
team_compositions = team_compositions.loc[valid_game_ids]

# Step 2: For each GameID, convert team abbreviation to full team name
teams = games[['GameID', 'Team1', 'Team2', 'Winner']]
team_map = games.set_index("GameID")[["Team1", "Team2"]]

merged = team_compositions.merge(games, how="left", on="GameID")

# Step 2: Assign positions in each group by GameID
merged["RowPosition"] = merged.groupby("GameID").cumcount()

# Step 3: Update TeamAbbreviation based on RowPosition
merged.loc[merged["RowPosition"] == 0, "TeamAbbreviation"] = merged["Team1"]
merged.loc[merged["RowPosition"] == 1, "TeamAbbreviation"] = merged["Team2"]

# Add column for winner
merged["Winner1"] = merged["TeamAbbreviation"] == merged["Winner"]

# Drop unnecessary columns like Team1, Team2, and RowPosition
team_compositions = merged[["GameID", "TeamAbbreviation", "Agent", "Winner1"]]


print(team_compositions.head(10))
print(teams[teams['GameID'] == '10004'])
print(len(team_compositions.index.get_level_values(0).unique()) / 2)
print(len(teams['GameID']))

  GameID   TeamAbbreviation                                  Agent  Winner1
0  10003       Bren Esports    {phoenix, omen, sova, cypher, raze}    False
1  10003  Attack All Around      {reyna, omen, sova, cypher, raze}     True
2  10004       Bren Esports      {reyna, omen, sova, cypher, jett}     True
3  10004  Attack All Around     {killjoy, reyna, omen, sova, jett}    False
4  10005       Bren Esports     {omen, cypher, jett, raze, breach}    False
5  10005  Attack All Around    {killjoy, omen, jett, raze, breach}     True
6  10006             HSDIRR     {phoenix, omen, viper, raze, sage}     True
7  10006           MJAOMODE  {killjoy, phoenix, omen, viper, raze}    False
8  10007             HSDIRR     {phoenix, omen, viper, jett, sage}    False
9  10007           MJAOMODE  {killjoy, phoenix, omen, viper, raze}     True
      GameID         Team1              Team2        Winner
12733  10004  Bren Esports  Attack All Around  Bren Esports
14771.0
15878


In [251]:
selection = {'phoenix', 'omen', 'sova', 'brimstone', 'raze'}
g = team_compositions[team_compositions['Agent'] == selection]
print(len(g))
s = g['Winner1'].sum()
print(s)
print("This comp's win rate is: %" + str((s/len(g)) * 100))

4
3
This comp's win rate is: %75.0


In [252]:
# Get all unique agents
all_agents = set().union(*merged["Agent"])
all_agents.remove('')

# One-hot encode agents
def encode_agents(row_agents, all_agents):
    return {agent: 1 if agent in row_agents else 0 for agent in all_agents}

# Create a DataFrame where columns represent agents
agent_columns = merged["Agent"].apply(lambda x: encode_agents(x, all_agents))

# Expand the one-hot encoding into separate columns
agent_df = pd.DataFrame(agent_columns.tolist(), index=merged.index)

# Combine with base DataFrame
merged = pd.concat([merged, agent_df], axis=1).drop(columns=["Agent"])  # Agents no longer needed
merged_clean = merged.drop(columns=['MatchID', 'Map', 'Team1ID', 'Team2ID', 'Team1_TotalRounds',
                                    'Team2_TotalRounds', 'Team1_SideFirstHalf', 'Team2_SideFirstHalf',
                                    'Team1_RoundsFirstHalf', 'Team1_RoundsSecondtHalf', 'Team1_RoundsOT',
                                    'Team2_RoundsFirstHalf', 'Team2_RoundsSecondtHalf', 'Team2_RoundsOT',
                                    'Team1_PistolWon', 'Team1_Eco', 'Team1_EcoWon', 'Team1_SemiEco',
                                    'Team1_SemiEcoWon', 'Team1_SemiBuy', 'Team1_SemiBuyWon',
                                    'Team1_FullBuy', 'Team1_FullBuyWon', 'Team2_PistolWon', 'Team2_Eco',
                                    'Team2_EcoWon', 'Team2_SemiEco', 'Team2_SemiEcoWon', 'Team2_SemiBuy',
                                    'Team2_SemiBuyWon', 'Team2_FullBuy', 'Team2_FullBuyWon'])

merged_clean.head()

Unnamed: 0,GameID,Team1,Team2,Winner,RowPosition,TeamAbbreviation,Winner1,killjoy,phoenix,omen,...,astra,chamber,yoru,reyna,viper,sova,kayo,brimstone,raze,breach
0,10003,Bren Esports,Attack All Around,Attack All Around,0,Bren Esports,False,0,1,1,...,0,0,0,0,0,1,0,0,1,0
1,10003,Bren Esports,Attack All Around,Attack All Around,1,Attack All Around,True,0,0,1,...,0,0,0,1,0,1,0,0,1,0
2,10004,Bren Esports,Attack All Around,Bren Esports,0,Bren Esports,True,0,0,1,...,0,0,0,1,0,1,0,0,0,0
3,10004,Bren Esports,Attack All Around,Bren Esports,1,Attack All Around,False,1,0,1,...,0,0,0,1,0,1,0,0,0,0
4,10005,Bren Esports,Attack All Around,Attack All Around,0,Bren Esports,False,0,0,1,...,0,0,0,0,0,0,0,0,1,1


In [254]:
# Split data into Team1 and Team2
team1 = merged_clean[merged_clean['Winner1'] == True]
team2 = merged_clean[merged_clean['Winner1'] == False]

# Rename columns to distinguish between Team1 and Team2.
team1 = team1.add_prefix("team1_")
team2 = team2.add_prefix("team2_")

# Combine both teams into a single row
final_df = pd.merge(team1, team2, left_on="team1_GameID", right_on="team2_GameID")

# Drop GameID columns if not required
columns_to_drop = [
    'team1_Team1', 'team1_Team2', 'team2_Team1',
    'team2_Team2', 'team1_RowPosition', 'team2_RowPosition',
    'team1_Winner', 'team2_Winner', 'team2_GameID'  # Redundant with Winner1 column
]

final_df = final_df.rename(columns={'team1_GameID': 'GameID', 'team1_TeamAbbreviation': 'Team1',
                                    'team2_TeamAbbreviation': 'Team2',})
final_df = final_df.drop(columns=columns_to_drop)
final_df['Winner1'] = final_df['team1_Winner1'] == True
final_df = final_df.drop(columns=['team1_Winner1', 'team2_Winner1'])

final_df.head()

Unnamed: 0,GameID,Team1,team1_killjoy,team1_phoenix,team1_omen,team1_cypher,team1_jett,team1_sage,team1_skye,team1_astra,...,team2_chamber,team2_yoru,team2_reyna,team2_viper,team2_sova,team2_kayo,team2_brimstone,team2_raze,team2_breach,Winner1
0,10003,Attack All Around,0,0,1,1,0,0,0,0,...,0,0,0,0,1,0,0,1,0,True
1,10004,Bren Esports,0,0,1,1,1,0,0,0,...,0,0,1,0,1,0,0,0,0,True
2,10005,Attack All Around,1,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,1,1,True
3,10006,HSDIRR,0,1,1,0,0,1,0,0,...,0,0,0,1,0,0,0,1,0,True
4,10007,MJAOMODE,1,1,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,True


In [255]:
# At this point, the winning team is always on the left, which must be fixed to prevent bias

# Randomly decide whether to swap Team1 and Team2 for each row
swap_teams = np.random.rand(len(final_df)) > 0.5

# Perform the swap where necessary
for idx in final_df.index[swap_teams]:
    # Swap columns for Team1 and Team2
    temp_team_columns = final_df.loc[idx, 'Team1':'team1_breach'].copy()
    final_df.loc[idx, 'Team1':'team1_breach'] = final_df.loc[idx, 'Team2':'team2_breach'].values
    final_df.loc[idx, 'Team2':'team2_breach'] = temp_team_columns.values

    # Flip the Winner1 column (because winner changes sides)
    final_df.loc[idx, 'Winner1'] = not final_df.loc[idx, 'Winner1']

final_df.head()

Unnamed: 0,GameID,Team1,team1_killjoy,team1_phoenix,team1_omen,team1_cypher,team1_jett,team1_sage,team1_skye,team1_astra,...,team2_chamber,team2_yoru,team2_reyna,team2_viper,team2_sova,team2_kayo,team2_brimstone,team2_raze,team2_breach,Winner1
0,10003,Attack All Around,0,0,1,1,0,0,0,0,...,0,0,0,0,1,0,0,1,0,True
1,10004,Bren Esports,0,0,1,1,1,0,0,0,...,0,0,1,0,1,0,0,0,0,True
2,10005,Attack All Around,1,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,1,1,True
3,10006,HSDIRR,0,1,1,0,0,1,0,0,...,0,0,0,1,0,0,0,1,0,True
4,10007,HSDIRR,0,1,1,0,1,1,0,0,...,0,0,0,1,0,0,0,1,0,False


In [256]:
# Check Winner1 distribution
team1_wins = final_df['Winner1'].sum()
total_matches = len(final_df)
print(f"Team1 Wins: {team1_wins}")
print(f"Team2 Wins: {total_matches - team1_wins}")

Team1 Wins: 7422
Team2 Wins: 7349


In [257]:
final_df['GameID'] = final_df['GameID'].astype(int)

X = final_df.drop(columns=['Team1', 'Team2', 'Winner1', 'GameID'])
y = final_df['Winner1']

In [315]:
from sklearn.model_selection import train_test_split

# 80-20 training/testing split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training Data: {X_train.shape}, Test Data: {X_test.shape}")

Training Data: (11816, 34), Test Data: (2955, 34)


In [318]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

rf_model = RandomForestClassifier(n_estimators=200, max_depth=5, min_samples_split=20,
                                  min_samples_leaf=4, random_state=42)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

Random Forest Classification Report:
              precision    recall  f1-score   support

       False       0.50      0.40      0.45      1475
        True       0.50      0.61      0.55      1480

    accuracy                           0.50      2955
   macro avg       0.50      0.50      0.50      2955
weighted avg       0.50      0.50      0.50      2955

Random Forest Accuracy: 0.5045685279187817


In [324]:
# Example: Predicting a single match outcome
team1_agents = {"killjoy", "raze", "jett", "omen", "viper"}
team2_agents = {"sage", "reyna", "omen", "phoenix", "raze"}

# One-hot encode agents for both teams
team1_encoded = {f'team1_{agent}': 1 if agent in team1_agents else 0 for agent in all_agents}
team2_encoded = {f'team2_{agent}': 1 if agent in team2_agents else 0 for agent in all_agents}

# Combine into a single row
input_row = pd.DataFrame([{**team1_encoded, **team2_encoded}])

# Predict using the model
rf_probability = rf_model.predict_proba(input_row)[0][1] * 100  # Probability Team1 wins
rf_probability_team2 = 100 - rf_probability  # Probability Team2 wins

print(f"Chance of Team1 winning: {rf_probability:.2f}%")
print(f"Chance of Team2 winning: {rf_probability_team2:.2f}%")


Chance of Team1 winning: 49.41%
Chance of Team2 winning: 50.59%


In [306]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300, 500],  # Vary the number of trees
    'max_depth': [5, 10, 15, 20],  # Vary tree depth
    'min_samples_split': [2, 5, 10, 20],  # Vary splitting criteria
    'min_samples_leaf': [1, 2, 4, 8]  # Vary minimum samples per leaf
}

# Create Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Perform Grid Search
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)

# Output the best parameters and accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)
# Note: best was {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 20, 'n_estimators': 200}
# Best Accuracy: 0.5129488668677783

Fitting 3 folds for each of 256 candidates, totalling 768 fits
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.5s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.5s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.5s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   0.7s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   0.7s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   0.8s
[CV] END max_depth=5, min_samples_lea

In [325]:
# Input test
msg = input("Marco: ")
print(msg)

Polo


In [17]:
# DO NOT TAKE INTO ACCOUNT CODE PAST THIS AREA: IT WAS FROM BEFORE I GOT THINGS WORKING!!!

# Part 1 goal:
# Find the chance of a chosen combination of agents winning
# against another combination of agents. First step will be
# finding overall win rate for a combination of agents.

agents = scoreboard['Agent'].unique()
print(agents, len(agents))

# Example: Extract unique agents (already done earlier)
agents = scoreboard['Agent'].unique()

# Use team_compositions (teams with sets of agents) to create one-hot encoding
def one_hot_encode_team_compositions(team_compositions, all_agents):
    # 'all_agents' defines the feature space (columns)
    encoded_data = []
    team_index = []

    for idx, agents_set in team_compositions.items():
        # For each team composition, generate a binary array for all agents
        encoded_row = [1 if agent in agents_set else 0 for agent in all_agents]
        encoded_data.append(encoded_row)
        team_index.append(idx)  # Keep track of MultiIndex values (GameID, TeamAbbreviation)

    # Create a DataFrame with one-hot encoded rows, indexed by (GameID, TeamAbbreviation)
    one_hot_df = pd.DataFrame(
        encoded_data,
        columns=all_agents,
        index=pd.MultiIndex.from_tuples(team_index, names=team_compositions.index.names)
    )
    return one_hot_df


# Apply the one-hot encoding function
one_hot_team_comps = one_hot_encode_team_compositions(team_compositions['Agent'], agents)

print(one_hot_team_comps.head())
print("THIS IS IT ^^^")

# print(team_compositions.info())
# print(one_hot_team_comps.info())

GameID
60894    Booster Seat Gaming
60895               Pho Real
60896    Booster Seat Gaming
60924         Bjor's Kittens
60925         Bjor's Kittens
Name: Winner, dtype: object
                                                       Agent
GameID TeamAbbreviation                                     
10003  AAA               {sova, raze, phoenix, cypher, omen}
       BRN                 {sova, raze, cypher, reyna, omen}
10004  AAA                 {sova, cypher, reyna, jett, omen}
       BRN                {sova, reyna, jett, omen, killjoy}
10005  AAA                {breach, raze, cypher, jett, omen}
                                                 Agent
TeamAbbreviation                                      
RAD               {viper, astra, sova, reyna, chamber}
same                {viper, sova, cypher, reyna, jett}


In [20]:
team_comps_unique = []
t = 0
for i in team_compositions.index:
    if team_compositions.loc[i]['Agent'] not in team_comps_unique:
        team_comps_unique.append(team_compositions.loc[i]['Agent'])

In [72]:
print(len(team_comps_unique))
print(team_comps_unique[-1])

# Initialize 'who_won' DataFrame based on the team_compositions index
who_won = pd.DataFrame(index=['GameID', 'Team'], columns=["Winner"], dtype=bool)
who_won['Winner'] = True

# Group the DataFrame by GameID
grouped = who_won.groupby(level="GameID")

# Select the 0th row (first row in each group)
first_row_idx = grouped.nth(0).index

# Select the 1st row (second row in each group)
second_row_idx = grouped.nth(1).index

#who_won.loc[first_row_idx, "Winner"] = True if games[]

# for _, row in games.iterrows():
#     game_id = int(row["GameID"])
#     winner = row["Winner"]
#
#     if len(who_won.loc[game_id]) < 2:
#         who_won = who_won.drop(game_id)
#         continue
#     first_row = who_won.loc[game_id].iloc[0].name  # Abbreviation for Team1
#
#     second_row = who_won.loc[game_id].iloc[1].name  # Abbreviation for Team2
#
#     # Update the Winner column directly using index
#     if row["Team1"] == winner:  # Team1 won
#         who_won.at[(game_id, first_row), "Winner"] = True
#         who_won.at[(game_id, second_row), "Winner"] = False
#     elif row["Team2"] == winner:  # Team2 won
#         who_won.at[(game_id, first_row), "Winner"] = False
#         who_won.at[(game_id, second_row), "Winner"] = True


#print(who_won.loc[57193])
games[games['GameID'] == "57193"]

1456
{'sage', 'raze', 'cypher', 'reyna', 'jett'}


Unnamed: 0,GameID,MatchID,Map,Team1ID,Team2ID,Team1,Team2,Winner,Team1_TotalRounds,Team2_TotalRounds,...,Team1_FullBuyWon,Team2_PistolWon,Team2_Eco,Team2_EcoWon,Team2_SemiEco,Team2_SemiEcoWon,Team2_SemiBuy,Team2_SemiBuyWon,Team2_FullBuy,Team2_FullBuyWon
1375,57193,50407,TBD,6748,6737,ANYWAY,Claquettes cahussettes,ANYWAY,13,0,...,,,,,,,,,,


In [67]:
agents = scoreboard['Agent'].unique()
print(agents, len(agents))

import pandas as pd

# Example: Extract unique agents (already done earlier)
agents = scoreboard['Agent'].unique()


# Use team_compositions (teams with sets of agents) to create one-hot encoding
def one_hot_encode_team_compositions(team_compositions, all_agents):
    # 'all_agents' defines the feature space (columns)
    encoded_data = []
    team_index = []

    for idx, agents_set in team_compositions.items():
        # For each team composition, generate a binary array for all agents
        encoded_row = [1 if agent in agents_set else 0 for agent in all_agents]
        encoded_data.append(encoded_row)
        team_index.append(idx)  # Keep track of MultiIndex values (GameID, TeamAbbreviation)

    # Create a DataFrame with one-hot encoded rows, indexed by (GameID, TeamAbbreviation)
    one_hot_df = pd.DataFrame(
        encoded_data,
        columns=all_agents,
        index=pd.MultiIndex.from_tuples(team_index, names=team_compositions.index.names)
    )
    return one_hot_df


# Apply the one-hot encoding function
one_hot_team_comps = one_hot_encode_team_compositions(team_compositions['Agent'], agents)

print(one_hot_team_comps.head())
print("THIS IS IT ^^^")

# print(team_compositions.info())
# print(one_hot_team_comps.info())

['jett' 'chamber' 'sova' 'viper' 'skye' 'astra' 'raze' 'sage' 'kayo'
 'killjoy' 'reyna' 'cypher' 'breach' 'omen' 'brimstone' '' 'phoenix'
 'yoru'] 18
                         jett  chamber  sova  viper  skye  astra  raze  sage  \
GameID TeamAbbreviation                                                        
10003  AAA                  0        0     1      0     0      0     1     0   
       BRN                  0        0     1      0     0      0     1     0   
10004  AAA                  1        0     1      0     0      0     0     0   
       BRN                  1        0     1      0     0      0     0     0   
10005  AAA                  1        0     0      0     0      0     1     0   

                         kayo  killjoy  reyna  cypher  breach  omen  \
GameID TeamAbbreviation                                               
10003  AAA                  0        0      0       1       0     1   
       BRN                  0        0      1       1       0     1   
10004

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

reg_model = GradientBoostingRegressor()
reg_model.fit(X_train, y_train)
