## Importing the libraries, functions and cardhash

In [1]:
## Import the libraries
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import xlsxwriter

## Import hashfiles of card
cardhash_df = pd.read_excel("SVCardInfo.xlsx", sheet_name="FileData")
cardhash_dict = cardhash_df.set_index("hash")["base_card_name"].to_dict()

## Import function
from DeckClassify import DeckSearch
from SVExcelFormatter import DeckBreakdownToExcel, SVOExcelFormatter

## Importing SVO decklist file, and extracting player information from Battlefy
- The objective is to merge data from different sources into one DataFrame

In [2]:
## Convert excel of decklinks into DataFrame
## Input columns: Name	Player ID	Deck 1	Deck 2	Deck 3
rawdf_1 = pd.read_excel('input/SVO_SEAO_Apr2023.xlsx')

## Data Cleaning - Invalid decks pulled from client
rawdf_1 = rawdf_1.loc[(rawdf_1['Deck 1'].str.len() >= 287) | (rawdf_1['Deck 2'].str.len() >= 287) | (rawdf_1['Deck 3'].str.len() >= 287)].copy().reset_index(drop = True)

In [3]:
## Extract participant and match info from Battlefy
bfyTourCode = '6430cbc4fc426841771e5894'

## Pull from battlefy and check against participating players
targetStandings = f'https://dtmwra1jsgyb0.cloudfront.net/stages/{bfyTourCode}/latest-round-standings'
jsonResponse = requests.get(targetStandings).json()
df = pd.json_normalize(jsonResponse)

# Extract customFields
## Sort customFields list of dictionary by unique _id for each customFields.
df['team.customFields'] = df['team.customFields'].apply(lambda x: sorted(x, key = lambda y: y['_id']))
    ## Check for the number of dictionary in list before running, adjust range(0, n) accordingly.
for n in range(0,3): 
    df[f'customField{n}'] = df['team.customFields'].apply(lambda x: x[n]['value'])
df = df.copy()[['teamID', 'wins', 'customField0', 'customField1', 'customField2', 'team.customFields']]
df = df.rename(columns = {"wins": "Swiss Wins", "customField0":"Player ID", "customField1":"Discord ID", "customField2":"Twitter ID"})

## Data Cleaning - Player ID
df["Player ID"] = df["Player ID"].apply(lambda x: x.replace("-", "").replace(" ", "") )
    ## Manual adjustments
# df.replace('534186335', '534186334', inplace = True)
# df.replace('24241352', '242417352', inplace = True)
df["Player ID"] = df["Player ID"].apply(lambda x: int(x.replace(" ", "")))

In [4]:
## Join tables from excel decklist to participants in battlefy
df_comb_final = pd.concat([rawdf_1.set_index('Player ID'), df.set_index('Player ID')], axis=1, join="inner").reset_index(drop = False)

In [5]:
## Check
print(f"Players extracted from SVO rawdf_1 : {rawdf_1.shape[0]}")
print(f"Players extracted from Battlefy : {df.shape[0]}")
print(f"Players in df_comb_final : {df_comb_final.shape[0]}")

Players extracted from SVO rawdf_1 : 201
Players extracted from Battlefy : 175
Players in df_comb_final : 174


## Deck Classification & Summarization

In [6]:
# ## Pre-game only, otherwise keep commented
# ## First sort the classes (use sorted which return a value, do not use sort). Then create columns for decks.
# rawdf_1['ListOfDecks'] = rawdf_1.apply(lambda x: sorted([x['Deck 1'], x['Deck 2'], x['Deck 3']]), axis = 1)
# for n in range(3):
#     rawdf_1[f"Deck{n+1}_URL"] = rawdf_1["ListOfDecks"].apply(lambda x: x[n])

# ## Keep only relevant columns for subsequent use.
# svo_dfwide = rawdf_1.copy()

## First sort the classes (use sorted which return a value, do not use sort). Then create columns for decks.
df_comb_final['ListOfDecks'] = df_comb_final.apply(lambda x: sorted([x['Deck 1'], x['Deck 2'], x['Deck 3']]), axis = 1)
for n in range(3):
    df_comb_final[f"Deck{n+1}_URL"] = df_comb_final["ListOfDecks"].apply(lambda x: x[n])

## Keep only relevant columns for subsequent use.
svo_dfwide = df_comb_final.copy()

In [7]:
## Prepare wide format
## 1. Replace card hash with card names
for n in range(1, 4):
    svo_dfwide[f"Deck{n}"] = svo_dfwide[f"Deck{n}_URL"].replace(cardhash_dict, regex=True)
## 2. Pass function to classify decks    
for n in range(1, 4):
    svo_dfwide[f"Deck{n}"] = svo_dfwide[f"Deck{n}"].apply(lambda d: DeckSearch(d))
## 3. Create a new column containing the lineups. To convert to tuple for immutability for sorting later.
svo_dfwide["Lineup"] = svo_dfwide.apply(lambda x: tuple([x["Deck1"], x["Deck2"], x["Deck3"]]), axis=1)
svo_dfwide["C_Lineup"] = svo_dfwide.apply(lambda x: tuple([x["Deck1"].split(" ")[-1].title() + "craft", x["Deck2"].split(" ")[-1].title() + "craft", x["Deck3"].split(" ")[-1].title() + "craft"]), axis=1)
## 4. Sort & Prepare wide format (Wide)
svo_dfwide = svo_dfwide[['Name', 'Player ID', 'Deck1_URL', 'Deck2_URL', 'Deck3_URL', 'Deck1', 'Deck2', 'Deck3', 'Lineup', 'C_Lineup', 'Swiss Wins']]
svo_dfwide = svo_dfwide.sort_values(['Swiss Wins', 'C_Lineup', 'Lineup', 'Name'], ascending=[False, True, True, True]).reset_index(drop = True)

## Prepare columnar format (Tall)
## 1. Stack vertically using concat, prepare two copies with renamed columns. Ignore index on appending.
svo_dftall = pd.concat([svo_dfwide[["Player ID", "Name", "Deck1_URL", "Deck1"]].copy().rename(columns = {"Deck1_URL":"Deck_URL", "Deck1":"Deck"}), 
                        svo_dfwide[["Player ID", "Name", "Deck2_URL", "Deck2"]].copy().rename(columns = {"Deck2_URL":"Deck_URL", "Deck2":"Deck"}),
                        svo_dfwide[["Player ID", "Name", "Deck3_URL", "Deck3"]].copy().rename(columns = {"Deck3_URL":"Deck_URL", "Deck3":"Deck"})], ignore_index=True)
## 2. Create a column for classes using string slicing.
svo_dftall["Class"] = svo_dftall["Deck"].apply(lambda x: x.split(" ")[-1].title() + "craft")
## 3. Sort
svo_dftall = svo_dftall.sort_values(["Player ID", "Deck_URL", "Deck"], ascending=[True, True, True]).reset_index(drop = True)
svo_dftall['Name'] = svo_dftall['Name'].astype(str) 

## Create Summary DataFrame
## Deck Summary
deck_summary_df = svo_dftall[["Deck", "Class"]].value_counts().reset_index(name='Count')
deck_summary_df["%ofPlayers"] = deck_summary_df["Count"]/(deck_summary_df["Count"].sum()/3)
## Class Summary
class_summary_df = svo_dftall[["Class"]].value_counts().reset_index(name='Count')
class_summary_df["%ofPlayers"] = class_summary_df["Count"]/(class_summary_df["Count"].sum()/3)
## Lineup Summary
lineup_summary_df = svo_dfwide[["Lineup"]].value_counts().reset_index(name='Count')
lineup_summary_df["%ofPlayers"] = lineup_summary_df["Count"]/(lineup_summary_df["Count"].sum())
## C_Lineup Summary
clineup_summary_df = svo_dfwide[["C_Lineup"]].value_counts().reset_index(name='Count')
clineup_summary_df["%ofPlayers"] = clineup_summary_df["Count"]/(clineup_summary_df["Count"].sum())

## Deck Breakdown

In [8]:
## Deck Breakdown. Loop through columnar format for each decks in each classes, then store all DataFrames in a dictionary.
deck_breakdown_dict = {}
for classes in svo_dftall["Class"].unique():
    deck_breakdown_dict[f"{classes}"] = {}
    for deck in sorted(svo_dftall.loc[svo_dftall["Class"] == classes]["Deck"].unique()):
        # 1. Create a view of all players with the same deck archetype.
        player_df = svo_dftall.loc[svo_dftall["Deck"] == deck].copy().sort_values(by=["Name"])
        # 2. For each player, convert Deck_URL to a list of cards in the deck. 
        player_df["Deck_URL"] = player_df["Deck_URL"].apply(lambda x: x.split("deck/")[-1].split("?lang")[0][4:])
        player_df["Cards"] = player_df["Deck_URL"].apply(lambda x: x.split("."))
        player_df["Cards"] = player_df["Cards"].apply(lambda x: [each.replace(each, cardhash_dict[each]) for each in x])
        # 3. Convert list of cards into a DataFrame. Aggregate the Count of each cards with value_counts. Set index header as deck archetype name (which is card name too). Player forms the columns.
        player_df["Cards"] = player_df.apply(lambda x: pd.DataFrame(x["Cards"], columns = [f"{deck}"]).value_counts().to_frame(name = x["Name"]), axis=1)
        # 4. Create a merged DataFrame, displaying the granular breakdown of all players and their ratio of cards.
        player_list = player_df["Cards"].to_list()
        mergeddf = pd.concat(player_list, axis=1)
        # 5. Create a separate DataFrame storing all the descriptive stats.
        mergeddf2 = pd.DataFrame()
        mergeddf2["Count"] = mergeddf.apply(lambda x: x.count(), axis = 1)
        mergeddf2["Mean"] = mergeddf.apply(lambda x: x.mean(), axis = 1)
        mergeddf2["Median"] = mergeddf.apply(lambda x: x.median(), axis = 1)
        mergeddf2["SD"] = mergeddf.apply(lambda x: x.std(), axis = 1)
        # 6. Merge descriptive stats DataFrame to the front, with granular DataFrame. Then append to dictionary container
        mergeddf3 = pd.concat([mergeddf2, mergeddf], axis=1).sort_values(by=['Count', f"{deck}"], ascending=[False, True])
        deck_breakdown_dict[f"{classes}"][f"{deck}"] = (mergeddf3)

## Swiss Decay
- The objective of this section is to track what players fought against, and at which point did they get X-2

In [9]:
## Pull from battlefy and check against participating players
targetRound = f'https://dtmwra1jsgyb0.cloudfront.net/stages/{bfyTourCode}/matches'
jsonResponse_1 = requests.get(targetRound).json()
df_1 = pd.json_normalize(jsonResponse_1)

## Keep relevant rows for df_1, remove BYES and AFK rounds
df_1 = df_1[(df_1['isBye'] == False) & ~(df_1['isDoubleLoss'] == True)].reset_index(drop = True)

In [10]:
## Extract customFields
## Sort customFields list of dictionary by unique _id for each customFields.
for p in ['top', 'bottom']:
    df_1[f'{p}.team.customFields'] = df_1[f'{p}.team.customFields'].apply(lambda x: sorted(x, key = lambda y: y['_id']))
    ## Check for the number of dictionary in list before running, adjust range(0, n) accordingly.
    for n in range(0,3): 
        df_1[f'{p}.team.customField{n}'] = df_1[f'{p}.team.customFields'].apply(lambda x: x[n]['value'])

        
## Assume that customFields0 contains player's Shadowverse ID, else please adjust accordingly
df_1 = df_1.rename(columns = {"top.team.customField0":"top.team.sv_ID", "bottom.team.customField0":"bottom.team.sv_ID"})
## Data Cleaning - Player ID
df_1["top.team.sv_ID"] = df_1["top.team.sv_ID"].apply(lambda x: x.replace("-", "").replace(" ", "") )
df_1["bottom.team.sv_ID"] = df_1["bottom.team.sv_ID"].apply(lambda x: x.replace("-", "").replace(" ", "") )
    ## Manual adjustments
df_1.replace('534186335', '534186334', inplace = True)
df_1.replace('24241352', '242417352', inplace = True)

## Retain relevant information for decay
df_2 = df_1.copy()[['roundNumber', 'isBye', 'top.team.sv_ID', 'bottom.team.sv_ID', 'top.winner', 'bottom.winner', 'top.score', 'bottom.score']]

In [11]:
decay_list = {}
for n in range(df_2.shape[0]):
    if df_2['top.team.sv_ID'][n] not in decay_list.keys() and df_2['top.team.sv_ID'][n] != '':
        decay_list[df_2['top.team.sv_ID'][n]] = {'R1': '',
                                                 'R2': '',
                                                 'R3': '',
                                                 'R4': '',
                                                 'R5': '',
                                                 'R6': '',
                                                 'R7': '',
                                                 'outRound': 0,
                                                 'Xrounds': [],
                                                 'Xcount': 0}
        
    if df_2['bottom.team.sv_ID'][n] not in decay_list.keys() and df_2['bottom.team.sv_ID'][n] != '':
        decay_list[df_2['bottom.team.sv_ID'][n]] = {'R1': '',
                                                    'R2': '',
                                                    'R3': '',
                                                    'R4': '',
                                                    'R5': '',
                                                    'R6': '',
                                                    'R7': '',
                                                    'outRound': 0,
                                                    'Xrounds': [],
                                                    'Xcount': 0}
        
    if df_2['isBye'][n] == False:
        decay_list[df_2['top.team.sv_ID'][n]]['R{}'.format(df_2['roundNumber'][n])] = df_2['bottom.team.sv_ID'][n]
        decay_list[df_2['bottom.team.sv_ID'][n]]['R{}'.format(df_2['roundNumber'][n])] = df_2['top.team.sv_ID'][n]
        if df_2['top.winner'][n] == False:
            decay_list[df_2['top.team.sv_ID'][n]]['Xcount'] += 1
            decay_list[df_2['top.team.sv_ID'][n]]['Xrounds'].append(df_2['roundNumber'][n])
            if decay_list[df_2['top.team.sv_ID'][n]]['Xcount'] == 2:
                decay_list[df_2['top.team.sv_ID'][n]]['outRound'] = df_2['roundNumber'][n]
        if df_2['bottom.winner'][n] == False:
            decay_list[df_2['bottom.team.sv_ID'][n]]['Xcount'] += 1
            decay_list[df_2['bottom.team.sv_ID'][n]]['Xrounds'].append(df_2['roundNumber'][n])
            if decay_list[df_2['bottom.team.sv_ID'][n]]['Xcount'] == 2:
                decay_list[df_2['bottom.team.sv_ID'][n]]['outRound'] = df_2['roundNumber'][n]
        
    if df_2['isBye'][n] == True:
        decay_list[df_2['top.team.sv_ID'][n]]['R{}'.format(df_2['roundNumber'][n])] = 'BYE'

In [12]:
svo_dfwide_temp = svo_dfwide.copy()
svo_dfwide_temp["Player ID"] = svo_dfwide_temp["Player ID"].apply(lambda x: str(x))
player_name_dict = svo_dfwide_temp.set_index("Player ID")["Name"].to_dict()
player_lu_dict = svo_dfwide_temp.set_index("Player ID")["Lineup"].to_dict()
player_clu_dict = svo_dfwide_temp.set_index("Player ID")["C_Lineup"].to_dict()

## Decay by player
df_3 = pd.DataFrame.from_dict(decay_list).T
df_3['Name'] = df_3.index
df_3 = df_3.replace(player_name_dict, regex = True)
df_3 = df_3.copy().reset_index(drop = False).rename(columns = {'index': 'Player ID'})[['Player ID', 'Name', 'outRound', 'Xcount', 'Xrounds', 'R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7']]
df_3["Player ID"] = df_3["Player ID"].apply(lambda x: int(x))
df_3 = pd.concat([df_3.set_index('Player ID'), df_comb_final.set_index('Player ID')['Swiss Wins']], axis=1, join="inner").reset_index(drop = False)[['Player ID', 'Name', 'Swiss Wins', 'outRound', 'Xcount', 'Xrounds', 'R1', 'R2', 'R3',
       'R4', 'R5', 'R6', 'R7']]
df_3 = df_3.sort_values(['Swiss Wins', 'outRound', 'Xcount', 'Name'], ascending=[False, False, False, True]).reset_index(drop = True)

## Decay by lineup
df_4 = pd.DataFrame.from_dict(decay_list).T
df_4['Name'] = df_4.index
df_4['Name'] = df_4['Name'].replace(player_name_dict, regex = True)
df_4['Lineup'] = df_4.index
df_4 = df_4.replace(player_lu_dict, regex = True)
df_4 = df_4.copy().reset_index(drop = False).rename(columns = {'index': 'Player ID'})[['Player ID', 'Name', 'outRound', 'Xcount', 'Xrounds','Lineup', 'R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7']]
df_4["Player ID"] = df_4["Player ID"].apply(lambda x: int(x))
df_4 = pd.concat([df_4.set_index('Player ID'), df_comb_final.set_index('Player ID')['Swiss Wins']], axis=1, join="inner").reset_index(drop = False)[['Player ID', 'Name', 'Swiss Wins', 'outRound', 'Xcount', 'Xrounds', 'Lineup', 'R1', 'R2', 'R3',
       'R4', 'R5', 'R6', 'R7']]
df_4 = df_4.sort_values(['Swiss Wins', 'outRound', 'Xcount', 'Lineup', 'Name'], ascending=[False, False, False, True, True]).reset_index(drop = True)

## Matchup Analysis
- The objective of this section is to summarize and look at the matchups between lineups

In [13]:
## Keep relevant columns for df
df_5 = df_2.copy()[['top.team.sv_ID', 'top.winner', 'top.score', 'bottom.team.sv_ID', 'bottom.winner', 'bottom.score']]
df_5.rename(columns = {'top.team.sv_ID': 'P1', 'top.winner': 'P1_Win', 'top.score': 'P1_Games', 
                      'bottom.team.sv_ID': 'P2', 'bottom.winner': 'P2_Win', 'bottom.score': 'P2_Games'}, inplace=True)

## Replace SVID with Lineup
bkt_merged_df_a = df_5.copy()
bkt_merged_df_a['P1'] = bkt_merged_df_a['P1'].replace(player_lu_dict, regex=True).astype(str)
bkt_merged_df_a['P2'] = bkt_merged_df_a['P2'].replace(player_lu_dict, regex=True).astype(str)

# # ## Pick only relevant columns
bkt_merged_df_b = bkt_merged_df_a[['P1', 'P1_Win', 'P1_Games', 'P2', 'P2_Win', 'P2_Games']].sort_values(['P1', 'P2'], ascending = [True, True]).reset_index(drop = True).copy()
bkt_merged_df_b['P1'] = bkt_merged_df_b['P1'].astype(str)
bkt_merged_df_b['P2'] = bkt_merged_df_b['P2'].astype(str)
# ## Sort across P1 and P2 lineups
bkt_merged_df_b['sortcheck'] = bkt_merged_df_b.apply(lambda x: 0 if sorted([x['P1'], x['P2']]) == ([x['P1'], x['P2']]) else 1, axis = 1)
bkt_merged_df_b[['P1', 'P2', 'P1_Win', 'P2_Win', 'P1_Games', 'P2_Games']] = bkt_merged_df_b[['P2', 'P1', 'P2_Win', 'P1_Win', 'P2_Games', 'P1_Games']].where(bkt_merged_df_b['sortcheck'] == 1, bkt_merged_df_b[['P1', 'P2', 'P1_Win', 'P2_Win', 'P1_Games', 'P2_Games']].values)
bkt_merged_df_b.drop(['sortcheck'], axis = 1, inplace = True)
# ## Aggregate
bkt_merged_df_c = bkt_merged_df_b.groupby(['P1','P2']).sum().copy()
bkt_merged_df_c['Count'] = bkt_merged_df_c['P1_Win'] + bkt_merged_df_c['P2_Win']
bkt_merged_df_c['Spread'] = bkt_merged_df_c.apply(lambda x: abs((x['P1_Win']-x['P2_Win'])/x['Count']) , axis = 1)
bkt_merged_df_c = bkt_merged_df_c.sort_values(['Count', 'Spread'], ascending = [False, False]).reset_index(drop = False)

## Replace nicename with c_Lineup
bkt_merged_df_i = df_5.copy()
bkt_merged_df_i['P1'] = bkt_merged_df_i['P1'].replace(player_clu_dict, regex=True).astype(str)
bkt_merged_df_i['P2'] = bkt_merged_df_i['P2'].replace(player_clu_dict, regex=True).astype(str)

## Calculate and aggregate
bkt_merged_df_j = bkt_merged_df_i[['P1', 'P1_Win', 'P1_Games', 'P2', 'P2_Win', 'P2_Games']].sort_values(['P1', 'P2'], ascending = [True, True]).reset_index(drop = True).copy()
bkt_merged_df_j['P1'] = bkt_merged_df_j['P1'].astype(str)
bkt_merged_df_j['P2'] = bkt_merged_df_j['P2'].astype(str)
## Sort across P1 and P2 lineups
bkt_merged_df_j['sortcheck'] = bkt_merged_df_j.apply(lambda x: 0 if sorted([x['P1'], x['P2']]) == ([x['P1'], x['P2']]) else 1, axis = 1)
bkt_merged_df_j[['P1', 'P2', 'P1_Win', 'P2_Win', 'P1_Games', 'P2_Games']] = bkt_merged_df_j[['P2', 'P1', 'P2_Win', 'P1_Win', 'P2_Games', 'P1_Games']].where(bkt_merged_df_j['sortcheck'] == 1, bkt_merged_df_j[['P1', 'P2', 'P1_Win', 'P2_Win', 'P1_Games', 'P2_Games']].values)
bkt_merged_df_j.drop(['sortcheck'], axis = 1, inplace = True)
## Aggregate
bkt_merged_df_k = bkt_merged_df_j.groupby(['P1','P2']).sum().copy()
bkt_merged_df_k['Count'] = bkt_merged_df_k['P1_Win'] + bkt_merged_df_k['P2_Win']
bkt_merged_df_k['Spread'] = bkt_merged_df_k.apply(lambda x: abs((x['P1_Win']-x['P2_Win'])/x['Count']) , axis = 1)
bkt_merged_df_k = bkt_merged_df_k.sort_values(['Count', 'Spread'], ascending = [False, False]).reset_index(drop = False)

## Export the results

In [14]:
## Specify the file name and directory folder Output
input_filename = 'SEAO SVO Apr2023 Final'
export_filename = f"Output/{input_filename.replace(' ', '_')}.xlsx"
svowriter = pd.ExcelWriter(export_filename)
svoworkbook = svowriter.book

## Add DataFrames to Sheets
deck_summary_df.to_excel(svowriter, sheet_name="Summary", index=False, startrow = 1, startcol = 0) ## Deck Archetype
class_summary_df.to_excel(svowriter, sheet_name="Summary", index=False, startrow = deck_summary_df.shape[0] + 3, startcol = 1) ## Class Summary, To be placed below Deck Archetype Table
lineup_summary_df.to_excel(svowriter, sheet_name="Summary", index=False, startrow = 1, startcol = 5) ## Lineup Statistics
clineup_summary_df.to_excel(svowriter, sheet_name="Summary", index=False, startrow = 1, startcol = 9) ## C_Lineup Statistics
svo_dfwide.to_excel(svowriter, sheet_name='Wide', index=False) ## Wide
df_3.to_excel(svowriter, sheet_name='DecayPlayer', index = False)
df_4.to_excel(svowriter, sheet_name='DecayLU', index = False)
bkt_merged_df_c.to_excel(svowriter, sheet_name='Matchup', index=False, startrow = 1, startcol = 0) ## Matchup
bkt_merged_df_k.to_excel(svowriter, sheet_name='Matchup', index=False, startrow = 1, startcol = 9) ## Matchup
DeckBreakdownToExcel(deck_breakdown_dict, svowriter, svoworkbook)
svo_dftall.to_excel(svowriter, sheet_name='Tall', index=False) ## Tall
df_comb_final[['Player ID', 'Name', 'Discord ID', 'Twitter ID', 'team.customFields']].to_excel(svowriter, sheet_name='BattlefyDB', index=False) ## battlefy

## Run the Export functions for fomatting.
SVOExcelFormatter(svowriter, svoworkbook, input_filename, df_5)
## Save to Excel file
svowriter.save()
svowriter.close()

  warn("Calling close() on already closed file.")
