In [1]:
import pandas as pd
from IPython.display import HTML

In [2]:
# The function below is useful when I disply dataframes.
# We can display multiple dataframes side by side.
def side_by_side(*dfs):
    html = '<div style="display:flex">'
    for df in dfs:
        html += '<div style="margin-right: 2em">'
        html += df.to_html()
        html += '</div>'
    html += '</div>'
    display(HTML(html))

In this notebook, I would like to see if team compositions have an influence on match results.

We can get team composition and match results data from "overview" and "maps_scores".

Also, the cell below has the list of top tier tournaments in 2022.

In [3]:
### List of tournaments I want to include in my data set.
# Stage 1
stage1 = ["Champions Tour Stage 1: EMEA Challengers",
"Champions Tour North America Stage 1: Challengers",
"Champions Tour Latin America Stage 1: Playoffs",
"Champions Tour LATAM/BR Stage 1: Last Chance Qualifier",
"Champions Tour Asia-Pacific Stage 1: Challengers Playoffs",
"Champions Tour Korea Stage 1: Challengers",
"Champions Tour Japan Stage 1: Challengers Playoffs",
"Valorant Champions Tour Stage 1: Masters Reykjavík"]

# Stage 2
stage2 = ["Champions Tour EMEA Stage 2: Challengers",
"Champions Tour North America Stage 2: Challengers",
"Champions Tour LATAM/BR Stage 2: Last Chance Qualifier",
"Champions Tour Brazil Stage 2: Challengers",
"Champions Tour Asia-Pacific Stage 2: Challengers Playoffs",
"Champions Tour Korea Stage 2: Challengers",
"Champions Tour Japan Stage 2: Challengers Playoffs",
"Valorant Champions Tour Stage 2: Masters Copenhagen"]

# Champions
champions = ["Champions Tour EMEA: Last Chance Qualifier",
"Champions Tour North America: Last Chance Qualifier",
"Champions Tour South America: Last Chance Qualifier",
"Champions Tour Asia-Pacific: Last Chance Qualifier",
"Champions Tour East Asia: Last Chance Qualifier",
"Valorant Champions 2022"]

vct_2022_stages = {"stage 1": stage1, "stage 2": stage2, "champions": champions}

In [4]:
maps_scores = pd.read_csv("../data/vct_2022/matches/maps_scores.csv")

maps_scores.head(10)

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team A,Team A Score,Team A Attacker Score,Team A Defender Score,Team A Overtime Score,Team B,Team B Score,Team B Attacker Score,Team B Defender Score,Team B Overtime Score,Duration
0,Valorant Champions 2022,Group Stage,Opening (A),Paper Rex vs EDward Gaming,Pearl,Paper Rex,13,6,7,,EDward Gaming,11,5,6,,1:16:34
1,Valorant Champions 2022,Group Stage,Opening (A),Paper Rex vs EDward Gaming,Icebox,Paper Rex,5,2,3,,EDward Gaming,13,3,10,,40:51
2,Valorant Champions 2022,Group Stage,Opening (A),Paper Rex vs EDward Gaming,Haven,Paper Rex,13,7,6,,EDward Gaming,8,3,5,,
3,Valorant Champions 2022,Group Stage,Opening (A),Leviatán vs Team Liquid,Haven,Leviatán,13,8,5,,Team Liquid,10,6,4,,1:04:37
4,Valorant Champions 2022,Group Stage,Opening (A),Leviatán vs Team Liquid,Ascent,Leviatán,13,6,7,,Team Liquid,10,4,6,,1:05:07
5,Valorant Champions 2022,Group Stage,Opening (B),ZETA DIVISION vs LOUD,Ascent,ZETA DIVISION,8,7,1,,LOUD,13,8,5,,1:05:35
6,Valorant Champions 2022,Group Stage,Opening (B),ZETA DIVISION vs LOUD,Fracture,ZETA DIVISION,9,7,2,,LOUD,13,8,5,,51:21
7,Valorant Champions 2022,Group Stage,Opening (B),OpTic Gaming vs BOOM Esports,Breeze,OpTic Gaming,16,4,8,4.0,BOOM Esports,18,4,8,6.0,1:46:49
8,Valorant Champions 2022,Group Stage,Opening (B),OpTic Gaming vs BOOM Esports,Bind,OpTic Gaming,13,10,3,,BOOM Esports,5,3,2,,38:28
9,Valorant Champions 2022,Group Stage,Opening (B),OpTic Gaming vs BOOM Esports,Fracture,OpTic Gaming,13,9,4,,BOOM Esports,3,0,3,,


In [11]:
overview = pd.read_csv("../data/vct_2022/matches/overview.csv")
overview["Agents"] = overview["Agents"].apply(lambda x: str(x).replace(" ",""))


In [13]:
# We don't need all rows from "overview" and the following are index.
ind1 = (overview.Map != "All Maps")    
ind2 = (overview.Side == "both")
ind = (ind1 & ind2)

In [14]:
keystolook = ["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team", "Agents"]
#  I found that some matches do have "Agents" value null.  We need to get rid of this.
print(overview[ind][keystolook].isna().sum())

Tournament    0
Stage         0
Match Type    0
Match Name    0
Map           0
Team          0
Agents        0
dtype: int64


In [15]:
#  It's not right that the number of missing Agents is 143 which is not divisible by 5.
#  I will fill null value with "000" for later.
#  Let's first convert our dataframe and see that new "Agents" has wrong values.  (Correct ones are lists of five character names.)
def join_strings(lst):
    return ','.join(str(x) for x in lst)

vct_2022_team_comp = overview[ind][keystolook].fillna("000")
agent_list = vct_2022_team_comp["Agents"].unique()  # before aggregating "Agents" to get a team composition, I want to save individual agents played in 2022.
print(agent_list)

vct_2022_team_comp = overview[ind][keystolook].groupby(["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team"], as_index=False).agg(lambda x: join_strings(sorted(list((x)))))


['fade' 'sage' 'astra' 'chamber' 'raze' 'viper' 'kayo' 'jett' 'sova'
 'reyna' 'cypher' 'skye' 'omen' 'breach' 'phoenix' 'killjoy' 'neon'
 'brimstone' 'yoru' 'omen,jett' 'kayo,jett' 'nan' 'raze,astra'
 'jett,viper' 'astra,chamber' 'omen,chamber' 'chamber,astra' 'reyna,omen'
 'jett,astra']


Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova"
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova"
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova"
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper"
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova"
...,...,...,...,...,...,...,...
17712,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Global Esports vs M1syl and friends,Breeze,M1syl and friends,"chamber,jett,skye,sova,viper"
17713,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,God Particles,"astra,jett,killjoy,raze,sova"
17714,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,Maruti Peek,"astra,jett,kayo,killjoy,sova"
17715,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Breeze,God Particles,"cypher,jett,skye,sova,viper"


In [18]:
overview[(overview["Agents"] == 'kayo,jett') & (overview["Map"] != "All Maps")]  # This line shows wrong input.  Agents column can't have mulitple agents when Map != "All Maps"

# So, getting agent_list in this stage doens't sound good.  I wanted to get agent_list in order to do linear regression later.
# I may preprocess the data later without making agent_list.

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Player,Team,Agents,Rating,Average Combat Score,...,Deaths,Assists,Kills - Deaths (KD),"Kill, Assist, Trade, Survive %",Average Damage per Round,Headshot %,First Kills,First Deaths,Kills - Deaths (FKD),Side
17517,Champions Tour North America Stage 2: Challengers,Open Qualifier #1,Round of 128,Lenny Time vs Go 4 Broke,Breeze,Camdog,Go 4 Broke,"kayo,jett",0.84,171.0,...,15.0,1.0,-4.0,58%,117.0,29%,1.0,4.0,-3.0,both
17518,Champions Tour North America Stage 2: Challengers,Open Qualifier #1,Round of 128,Lenny Time vs Go 4 Broke,Breeze,Camdog,Go 4 Broke,"kayo,jett",0.73,154.0,...,9.0,1.0,-3.0,50%,109.0,25%,0.0,3.0,-3.0,attack
17519,Champions Tour North America Stage 2: Challengers,Open Qualifier #1,Round of 128,Lenny Time vs Go 4 Broke,Breeze,Camdog,Go 4 Broke,"kayo,jett",1.02,203.0,...,6.0,0.0,-1.0,71%,131.0,38%,1.0,1.0,0.0,defend


In [19]:
len(vct_2022_team_comp.Agents.unique())

1109

"vct_2022_team_comp" doesn't have match result data.  I want to add it.

In [20]:
vct_2022_scores = maps_scores[["Tournament", "Stage", "Match Type", "Match Name", "Map",
                                "Team A", "Team A Score", "Team B", "Team B Score"]]
vct_2022_scores

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team A,Team A Score,Team B,Team B Score
0,Valorant Champions 2022,Group Stage,Opening (A),Paper Rex vs EDward Gaming,Pearl,Paper Rex,13,EDward Gaming,11
1,Valorant Champions 2022,Group Stage,Opening (A),Paper Rex vs EDward Gaming,Icebox,Paper Rex,5,EDward Gaming,13
2,Valorant Champions 2022,Group Stage,Opening (A),Paper Rex vs EDward Gaming,Haven,Paper Rex,13,EDward Gaming,8
3,Valorant Champions 2022,Group Stage,Opening (A),Leviatán vs Team Liquid,Haven,Leviatán,13,Team Liquid,10
4,Valorant Champions 2022,Group Stage,Opening (A),Leviatán vs Team Liquid,Ascent,Leviatán,13,Team Liquid,10
...,...,...,...,...,...,...,...,...,...
8879,Champions Tour CIS Stage 1: Challengers 1,Closed Qualifier,Grand Final,Natus Vincere vs FunPlus Phoenix,Bind,Natus Vincere,13,FunPlus Phoenix,10
8880,Champions Tour CIS Stage 1: Challengers 1,Closed Qualifier,Grand Final,Natus Vincere vs FunPlus Phoenix,Icebox,Natus Vincere,14,FunPlus Phoenix,12
8881,Champions Tour CIS Stage 1: Challengers 1,Closed Qualifier,Grand Final,Natus Vincere vs FunPlus Phoenix,Breeze,Natus Vincere,5,FunPlus Phoenix,13
8882,Champions Tour CIS Stage 1: Challengers 1,Closed Qualifier,Grand Final,Natus Vincere vs FunPlus Phoenix,Haven,Natus Vincere,7,FunPlus Phoenix,13


In [21]:
print("length of vct_2022_team_comp:", len(vct_2022_team_comp))
print("2(length of vct_2022_scores):", len(vct_2022_scores)*2)

length of vct_2022_team_comp: 17717
2(length of vct_2022_scores): 17768


vct_2022_scores has more match information than the other, so I will outer join them.  Before doing this, let's check if they have any null value. 

In [13]:
display(vct_2022_team_comp.isna().sum())
display(vct_2022_scores.isna().sum())

Tournament    0
Stage         0
Match Type    0
Match Name    0
Map           0
Team          0
Agents        0
dtype: int64

Tournament      0
Stage           0
Match Type      0
Match Name      0
Map             0
Team A          0
Team A Score    0
Team B          0
Team B Score    0
dtype: int64

Okay!  They look good.

In [22]:
left = vct_2022_team_comp
right = vct_2022_scores
keys = ["Tournament", "Stage", "Match Type", "Match Name", "Map"]
result = pd.merge(left, right, how="outer", on=keys)
print(len(result))
result.isna().sum()

17800


Tournament        0
Stage             0
Match Type        0
Match Name        0
Map               0
Team             75
Agents           75
Team A          110
Team A Score    110
Team B          110
Team B Score    110
dtype: int64

Quite a lot of missing values here.  Compare to the total number of games, the number of missing value does not seem terrible. (The ratio is 110/17800 = 0.006.)

I think it's okay to drop null values in this case.

In [23]:
vct_2022_comp_data = result.dropna()
vct_2022_comp_data.head()

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",Made in Thailand,13.0,CERBERUS Esports,8.0
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",Made in Thailand,13.0,CERBERUS Esports,8.0
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",Made in Thailand,13.0,CERBERUS Esports,3.0
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",Made in Thailand,13.0,CERBERUS Esports,3.0
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",Persija Esports,15.0,Bonkers,17.0


I want to add match result into this.

Let's call our dataframe df and follow the steps below.

- make "A win" column by setting "Team A Score" > "Team B Score"
- set "win" column to be False
- if "Team" == "Team A", then set "win" to be "A win" and if "Team" == "Team B", then set "win" to be ~"A win"

In [29]:
vct_2022_comp_data.loc[:,"A win"] = (vct_2022_comp_data.loc[:,"Team A Score"] > vct_2022_comp_data.loc[:,"Team B Score"])

vct_2022_comp_data

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score,A win
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",Made in Thailand,13.0,CERBERUS Esports,3.0,True
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",Made in Thailand,13.0,CERBERUS Esports,3.0,True
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",Persija Esports,15.0,Bonkers,17.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...
17795,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Global Esports vs M1syl and friends,Breeze,M1syl and friends,"chamber,jett,skye,sova,viper",Global Esports,13.0,M1syl and friends,8.0,True
17796,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,God Particles,"astra,jett,killjoy,raze,sova",Maruti Peek,3.0,God Particles,13.0,False
17797,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,Maruti Peek,"astra,jett,kayo,killjoy,sova",Maruti Peek,3.0,God Particles,13.0,False
17798,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Breeze,God Particles,"cypher,jett,skye,sova,viper",Maruti Peek,3.0,God Particles,13.0,False


In [30]:
win = []
for index, row in vct_2022_comp_data.iterrows():
    if row["Team"] == row["Team A"]:
        win.append(row["A win"])
    elif row["Team"] == row["Team B"]:
        win.append(not row["A win"])
    else:
        print("Team is neither of Team A nor Team B.  This is wrong.")


In [32]:
vct_2022_comp_data.loc[:,"win"] = win
vct_2022_comp_data.loc[:,"loss"] = vct_2022_comp_data.loc[:,"win"].apply(lambda x: not x)
vct_2022_comp_data.head()

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score,A win,win,loss
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True,False,True
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True,True,False
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",Made in Thailand,13.0,CERBERUS Esports,3.0,True,False,True
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",Made in Thailand,13.0,CERBERUS Esports,3.0,True,True,False
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",Persija Esports,15.0,Bonkers,17.0,False,True,False


It would be good to keep track of score differences. Let's do this.

In [34]:
vct_2022_comp_data.loc[:,"score diff"] = abs(vct_2022_comp_data.loc[:,"Team A Score"] - vct_2022_comp_data.loc[:,"Team B Score"])
vct_2022_comp_data

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score,A win,win,loss,score diff
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True,False,True,5.0
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True,True,False,5.0
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",Made in Thailand,13.0,CERBERUS Esports,3.0,True,False,True,10.0
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",Made in Thailand,13.0,CERBERUS Esports,3.0,True,True,False,10.0
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",Persija Esports,15.0,Bonkers,17.0,False,True,False,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17795,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Global Esports vs M1syl and friends,Breeze,M1syl and friends,"chamber,jett,skye,sova,viper",Global Esports,13.0,M1syl and friends,8.0,True,False,True,5.0
17796,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,God Particles,"astra,jett,killjoy,raze,sova",Maruti Peek,3.0,God Particles,13.0,False,True,False,10.0
17797,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,Maruti Peek,"astra,jett,kayo,killjoy,sova",Maruti Peek,3.0,God Particles,13.0,False,False,True,10.0
17798,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Breeze,God Particles,"cypher,jett,skye,sova,viper",Maruti Peek,3.0,God Particles,13.0,False,True,False,10.0


There are too many different team compositions if we use individual agents.

I think it's better to have adopt the way of general team composition.  For example, if we have "jett,kayo,killjoy,omen,sova", then this comp is 1 duelist, 2 initiators, 1 controller, 1 sentinel.

In [35]:
def get_comp(agents: str) -> (str, int):
    """ 
    input: string of five agents that order of agents are sorted aphabetically and seperated by ","
    return: ("dics", int) "dics" where d: the # of duelist(s)
                                i: the # of initiator(s)
                                c: the # of controller(s)
                                s: the # of sentinel(s)
            int means the number of invalid agents in input list
    """
    roles = {"duelist": {"jett", "phoenix", "reyna", "raze", "yoru", "neon", "iso"},
             "initiator": {"sova", "breach", "skye", "kayo", "fade", "gekko"},
             "controller": {"brimstone", "omen", "viper", "astra", "harbor"},
             "sentinel": {"cypher", "sage", "killjoy", "chamber", "deadlock"}}
    
    agentlist = agents.split(",")
    dul = 0
    ini = 0
    con = 0
    sen = 0
    non = 0
    composition = ""
    for agent in agentlist:
        if agent in roles["duelist"]:
            dul += 1
        elif agent in roles["initiator"]:
            ini += 1
        elif agent in roles["controller"]:
            con += 1
        elif agent in roles["sentinel"]:
            sen += 1
        else:
            non +=1

    # assert (dul + ini + con + sen) == 5, f"{dul + ini + con + sen}"   I checked with this line, there are not valid composition in our data.

    composition = str(dul) + str(ini) + str(con) + str(sen)

    return composition, non

In [37]:
vct_2022_comp_data.loc[:,"comp code"] = vct_2022_comp_data["Agents"].apply(lambda x: get_comp(x)[0])
vct_2022_comp_data.loc[:,"invalid comp"] = vct_2022_comp_data["Agents"].apply(lambda x: get_comp(x)[1])

vct_2022_comp_data

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score,A win,win,loss,score diff,comp code,invalid comp
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True,False,True,5.0,1211,0
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",Made in Thailand,13.0,CERBERUS Esports,8.0,True,True,False,5.0,1211,0
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",Made in Thailand,13.0,CERBERUS Esports,3.0,True,False,True,10.0,1211,0
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",Made in Thailand,13.0,CERBERUS Esports,3.0,True,True,False,10.0,1121,0
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",Persija Esports,15.0,Bonkers,17.0,False,True,False,2.0,1211,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17795,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Global Esports vs M1syl and friends,Breeze,M1syl and friends,"chamber,jett,skye,sova,viper",Global Esports,13.0,M1syl and friends,8.0,True,False,True,5.0,1211,0
17796,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,God Particles,"astra,jett,killjoy,raze,sova",Maruti Peek,3.0,God Particles,13.0,False,True,False,10.0,2111,0
17797,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,Maruti Peek,"astra,jett,kayo,killjoy,sova",Maruti Peek,3.0,God Particles,13.0,False,False,True,10.0,1211,0
17798,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Breeze,God Particles,"cypher,jett,skye,sova,viper",Maruti Peek,3.0,God Particles,13.0,False,True,False,10.0,1211,0


In [39]:
vct_2022_comp_data[vct_2022_comp_data["invalid comp"] != 0]

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score,A win,win,loss,score diff,comp code,invalid comp
4778,Champions Tour Europe Stage 1: Challengers 2,Open Qualifier,Round of 256,Giants Gaming vs ENTS,Ascent,Giants Gaming,"nan,nan,nan,nan,nan",Giants Gaming,13.0,ENTS,0.0,True,True,False,13.0,0,5
6260,Champions Tour Indonesia Stage 2: Challengers,Group Stage,Round 1,Bigetron Arctic vs Persija Esports,Fracture,Bigetron Arctic,"nan,nan,nan,nan,nan",Bigetron Arctic,0.0,Persija Esports,0.0,False,False,True,0.0,0,5
6261,Champions Tour Indonesia Stage 2: Challengers,Group Stage,Round 1,Bigetron Arctic vs Persija Esports,Fracture,Persija Esports,"nan,nan,nan,nan,nan",Bigetron Arctic,0.0,Persija Esports,0.0,False,True,False,0.0,0,5
9419,Champions Tour Malaysia & Singapore Stage 1: C...,Playoffs,Upper Quarterfinals,BLEED vs Fastelle,Ascent,BLEED,"nan,nan,nan,nan,nan",BLEED,0.0,Fastelle,0.0,False,False,True,0.0,0,5
9420,Champions Tour Malaysia & Singapore Stage 1: C...,Playoffs,Upper Quarterfinals,BLEED vs Fastelle,Ascent,Fastelle,"nan,nan,nan,nan,nan",BLEED,0.0,Fastelle,0.0,False,True,False,0.0,0,5
10245,Champions Tour North America Stage 1: Challengers,Open Qualifier #1,Lower Round 1,IlluZion vs Renegades,Haven,IlluZion,"nan,nan,nan,nan,nan",IlluZion,2.0,Renegades,13.0,False,False,True,11.0,0,5
10246,Champions Tour North America Stage 1: Challengers,Open Qualifier #1,Lower Round 1,IlluZion vs Renegades,Haven,Renegades,"nan,nan,nan,nan,nan",IlluZion,2.0,Renegades,13.0,False,True,False,11.0,0,5
11586,Champions Tour North America Stage 1: Challengers,Open Qualifier #2,Upper Round of 32,Rise vs Reformed,Breeze,Reformed,"nan,nan,nan,nan,nan",Rise,13.0,Reformed,7.0,True,False,True,6.0,0,5
11587,Champions Tour North America Stage 1: Challengers,Open Qualifier #2,Upper Round of 32,Rise vs Reformed,Breeze,Rise,"nan,nan,nan,nan,nan",Rise,13.0,Reformed,7.0,True,True,False,6.0,0,5
13075,Champions Tour Philippines Stage 1: Challengers,Group Stage,Group A,Bren Esports vs Action PH,Haven,Action PH,"nan,nan,nan,nan,nan",Bren Esports,13.0,Action PH,0.0,True,False,True,13.0,0,5


It looks like rows with invalid compositions are corrupted.  We want to drop this as well.

In [42]:
vct_2022_comp_data = vct_2022_comp_data[vct_2022_comp_data["invalid comp"] == 0]
display(vct_2022_comp_data[vct_2022_comp_data["invalid comp"] != 0])  #### <- I just want to check it works as expected.

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,Team A,Team A Score,Team B,Team B Score,A win,win,loss,score diff,comp code,invalid comp


In [43]:
keyfeatures = ["Tournament", "Stage", "Match Type", "Match Name", "Map", "Team", "Agents", "comp code", "win", "loss", "score diff"]
vct_2022_comp_data_cleaned = vct_2022_comp_data[keyfeatures]
vct_2022_comp_data_cleaned

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,comp code,win,loss,score diff
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",1211,False,True,5.0
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",1211,True,False,5.0
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",1211,False,True,10.0
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",1121,True,False,10.0
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",1211,True,False,2.0
...,...,...,...,...,...,...,...,...,...,...,...
17795,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Global Esports vs M1syl and friends,Breeze,M1syl and friends,"chamber,jett,skye,sova,viper",1211,False,True,5.0
17796,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,God Particles,"astra,jett,killjoy,raze,sova",2111,True,False,10.0
17797,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,Maruti Peek,"astra,jett,kayo,killjoy,sova",1211,False,True,10.0
17798,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Breeze,God Particles,"cypher,jett,skye,sova,viper",1211,True,False,10.0


In [44]:
print("The number of unique comp codes:", len(vct_2022_comp_data_cleaned["comp code"].unique()))
print("The number of unique compositions:", len(vct_2022_comp_data_cleaned["Agents"].unique()))

The number of unique comp codes: 40
The number of unique compositions: 1107


In [45]:
wincount_by_comp_code = vct_2022_comp_data_cleaned[["Map", "comp code", "win"]].groupby(["Map", "comp code"], as_index=False).agg("sum")
wincount_by_comp_code = wincount_by_comp_code\
                                .set_index(["Map", "comp code"])\
                                .rename(columns={"win": "win count"})

losscount_by_comp_code = vct_2022_comp_data_cleaned[["Map", "comp code", "loss"]].groupby(["Map", "comp code"], as_index=False).agg("sum")
losscount_by_comp_code = losscount_by_comp_code\
                                .set_index(["Map", "comp code"])\
                                .rename(columns={"loss": "loss count"})
# side_by_side(wincount_by_comp_code.sort_values(["Map", "win count"], ascending=False),
#                 losscount_by_comp_code.sort_values(["Map", "loss count"], ascending=False))

wincount_by_comp_code_sorted = wincount_by_comp_code.sort_values(["Map", "win count"], ascending=False)
losscount_by_comp_code_sorted = losscount_by_comp_code.sort_values(["Map", "loss count"], ascending=False)

In [46]:
maplist = vct_2022_comp_data_cleaned["Map"].unique()
mapdict = {}

for map in maplist:
    windf = wincount_by_comp_code_sorted.loc[map].head()
    lossdf = losscount_by_comp_code_sorted.loc[map].head()
    mapdict[map] = (windf, lossdf)

for map, df_tuple in mapdict.items():
    print("==============================")
    print(map)
    side_by_side(*df_tuple)



Ascent


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1211,1337
2111,191
1112,158
212,23
311,22

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1211,1369
2111,206
1112,108
212,27
311,17


Bind


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1121,349
1220,298
1211,274
2111,110
1112,64

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1211,325
1220,303
1121,278
2111,116
1112,74


Fracture


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1112,186
1121,164
1211,148
2111,86
2120,5

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1112,179
1211,167
1121,151
2111,79
1220,5


Haven


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1211,1410
2111,157
1112,52
2210,15
221,8

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1211,1365
2111,176
1112,48
2210,12
1121,10


Icebox


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1112,619
2111,490
1211,152
212,102
1121,52

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1112,566
2111,522
1211,175
212,102
1121,43


Split


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1112,330
1121,275
2012,131
1211,127
2111,72

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1112,335
1121,240
1211,144
2012,127
2111,88


Breeze


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1211,843
2111,104
221,29
1112,12
1220,11

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1211,809
2111,108
221,28
1112,19
311,14


Pearl


Unnamed: 0_level_0,win count
comp code,Unnamed: 1_level_1
1211,3
221,2
1112,2
1121,2
212,0

Unnamed: 0_level_0,loss count
comp code,Unnamed: 1_level_1
1121,3
1211,3
221,2
212,1
1112,0


In [49]:
mapdf = pd.get_dummies(vct_2022_comp_data_cleaned["Map"])  #dataframe with map played info that will be appended to the cleaned dataframe.

for map in maplist:
    vct_2022_comp_data_cleaned.loc[:,map] = mapdf[map]


In [50]:
vct_2022_comp_data_cleaned.keys()

Index(['Tournament', 'Stage', 'Match Type', 'Match Name', 'Map', 'Team',
       'Agents', 'comp code', 'win', 'loss', 'score diff', 'Ascent', 'Bind',
       'Fracture', 'Haven', 'Icebox', 'Split', 'Breeze', 'Pearl'],
      dtype='object')

In [73]:
# I want to define a funtion to get columns of individual characters to do linear regression on maps and characters.
# The following dictionary has all available characters in 2022 tournaments.  Characters played in other years are contained in this dictionary.
roles = {"duelist": {"jett", "phoenix", "reyna", "raze", "yoru", "neon"},
             "initiator": {"sova", "breach", "skye", "kayo", "fade"},
             "controller": {"brimstone", "omen", "viper", "astra"},
             "sentinel": {"cypher", "sage", "killjoy", "chamber"}}  
allcharacters = []
for role, chars in roles.items():
    allcharacters.extend(list(roles[role]))

allcharacters

['yoru',
 'neon',
 'phoenix',
 'jett',
 'reyna',
 'raze',
 'sova',
 'skye',
 'fade',
 'breach',
 'kayo',
 'omen',
 'astra',
 'brimstone',
 'viper',
 'killjoy',
 'sage',
 'cypher',
 'chamber']

In [81]:
def is_in_comp(a: str, b: str) -> bool:
    """ 
    input: a charcter, and a team composition
    output: True if given character is in the composition, False otherwise
    """
    agentcomp = b.split(",")
    return a in agentcomp


In [83]:
for character in allcharacters:
    for index, row in vct_2022_comp_data_cleaned.iterrows():
        vct_2022_comp_data_cleaned.loc[:,character] = vct_2022_comp_data_cleaned["Agents"].apply(lambda x: is_in_comp(character, x))

vct_2022_comp_data_cleaned

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team,Agents,comp code,win,loss,...,fade,breach,kayo,omen,astra,brimstone,viper,sage,cypher,chamber
0,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,CERBERUS Esports,"breach,chamber,jett,omen,sova",1211,False,True,...,False,True,False,True,False,False,False,False,False,True
1,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Ascent,Made in Thailand,"astra,jett,kayo,killjoy,sova",1211,True,False,...,False,False,True,False,True,False,False,False,False,False
2,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,CERBERUS Esports,"astra,breach,chamber,jett,sova",1211,False,True,...,False,True,False,False,True,False,False,False,False,True
3,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (A),Made in Thailand vs CERBERUS Esports,Bind,Made in Thailand,"brimstone,raze,sage,skye,viper",1121,True,False,...,False,False,False,False,False,True,True,True,False,False
4,Champions Tour Asia-Pacific Stage 1: Challenge...,Group Stage,Decider (B),Persija Esports vs Bonkers,Bind,Bonkers,"brimstone,chamber,neon,skye,sova",1211,True,False,...,False,False,False,False,False,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17795,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Global Esports vs M1syl and friends,Breeze,M1syl and friends,"chamber,jett,skye,sova,viper",1211,False,True,...,False,False,False,False,False,False,True,False,False,True
17796,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,God Particles,"astra,jett,killjoy,raze,sova",2111,True,False,...,False,False,False,False,True,False,False,False,False,False
17797,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Ascent,Maruti Peek,"astra,jett,kayo,killjoy,sova",1211,False,True,...,False,False,True,False,True,False,False,False,False,False
17798,Valorant Conquerors Championship,Wildcard Qualifier,Upper Bracket Semifinals,Maruti Peek vs God Particles,Breeze,God Particles,"cypher,jett,skye,sova,viper",1211,True,False,...,False,False,False,False,False,False,True,False,True,False


I want to do linear regression on individual agents and maps and that's why I wrote the cell right above.\
However, the running time is 1 min 30 seconds for each character, so the above cell takes about 30 minutes to run.\
I will make a new dataframe whose columns are:
- Tournament
- Stage
- Match Type
- Match Name
- Map
- List of maps
- List of agents
- Score
- Score diff
- Win.

The first five features (Tournament, Stage, Match Type, Match Name, Map) can be used as the match id.