In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

First lets work out how many pages are on the tournament decklists page.

In [2]:
def get_number_of_pages():
    r = requests.get(f"https://fabtcg.com/decklists/?page=1")
    soup = BeautifulSoup(r.text, 'html.parser')
    last = int(soup.findAll("a", {"class": "page-link starling"})[-1]['href'].partition("=")[-1])
    return last

Now on each page we have to read the table, find all the links, and stitch them together.

In [3]:
def get_page(page):
    r = requests.get(f"https://fabtcg.com/decklists/?page={page}")
    soup = BeautifulSoup(r.text, 'html.parser')
    
    df = pd.read_html(f"https://fabtcg.com/decklists/?page={page}", flavor='bs4')[0]
    
    table = soup.find('table')
    links = {}
    for tr in table.findAll("tr"):
        trs = tr.findAll("td")
        for each in trs:
            try:
                a_text = each.find('a').contents[0]
                a_link = each.find('a')['href']
                links[a_text] = a_link
            except:
                pass

    def fetch_deck_url(row):
        try:
            deck = links[row['Decklist']]
        except:
            deck = None

        return deck

    def fetch_event_url(row):
        try:
            event = links[row['Event']]
        except:
            event = None

        return event
    
    df['deck_link'] = df.apply(fetch_deck_url, axis=1)
    df['event_link'] = df.apply(fetch_event_url, axis=1)
    df['Date'] = pd.to_datetime(df['Date'])
    
    return df

Loop through the available pages and concat out the df.

In [4]:
def get_all_pages():
    dfs = []
    for i in range(1, get_number_of_pages()):
        dfs.append(get_page(i))
        
    return pd.concat(dfs).reset_index(drop=True)

In [5]:
df = get_all_pages()

In [6]:
df[df['deck_link'] == None]

Unnamed: 0,Country,Date,Decklist,Event,Format,Hero,Result,deck_link,event_link


In [7]:
df.to_csv("tournament_lists.csv")

Next we're going to use the cards we got from FABDB to get more data on card data.

In [8]:
card_df = pd.read_csv("cards.csv").drop(['image', 'resource', 'name'], axis=1)

In [9]:
def enrich_deck(deck_df, card_df):
    return pd.merge(deck_df, card_df, left_on=['name_resource'], right_on=['name_resource'])

In [10]:
def get_deck(url):
    r = requests.get(url)
    soup = r.text
    frames = pd.read_html(url, flavor='bs4')
    
    metadata = {frames[0].iloc[:, 0].to_list()[i]: frames[0].iloc[:, 1].to_list()[i] for i in range(frames[0].shape[0])}
    equipment = frames[1].iloc[:, 0].str.extract("(?P<copies>.*?) x (?P<name>.*)")
    
    dfs = []
    for i in frames[2:]:
        dfs.append(i.iloc[:, 0].str.extract("(?P<copies>.*?) x (?P<name>.*) \((?P<resource>.*)\)"))    
    df = pd.concat(dfs)
    df.append(equipment)
    
    def transform_name(row):
        resource_map = {
            '3': '(Blue)',
            '2': '(Yellow)',
            '1': '(Red)',
            np.nan: '',
        }

        return f"{row['name']} {resource_map[row['resource']]}"
    
    df['name_resource'] = df.apply(transform_name, axis=1)
    
    df = enrich_deck(df, card_df)
    numeric_cols = ['copies', 'resource', 'attack', 'cost', 'defense', 'intellect', 'life']
    df[numeric_cols].apply(pd.to_numeric, errors='coerce')
    
    return metadata, df

In [11]:
CRU_release = '2020-08-28'

def get_meta(df, date, tournament_format):
    return df[(df['Format'] == tournament_format) & (df['Date'] > date)]

In [12]:
cru_df = get_meta(df, date=CRU_release, tournament_format="CC")

In [13]:
def add_decks_to_meta(meta_df):
    meta_list = meta_df.to_dict(orient='records')
    for i in meta_list:
        if i['deck_link'] is None:
            print("decklink is None")
            print(i)
        else:
            i['metadata'], i['deck'] = get_deck(i['deck_link'])
    
    return meta_list

In [14]:
def get_meta_share(df):
    hero_counts = df['Hero'].value_counts()
    hero_counts_df = pd.DataFrame({'hero': cru_hero_counts.index, 'count': cru_hero_counts, 'percentage': cru_hero_counts / cru_hero_counts.sum()})
    
    return hero_counts_df

In [18]:
cru_df[cru_df['deck_link'].isnull()]

Unnamed: 0,Country,Date,Decklist,Event,Format,Hero,Result,deck_link,event_link
60,NZ,2020-08-30,Jasin Long Control Ninja Deck 30.8.20,New Zealand - Road …,CC,Katsu,1st,,https://fabtcg.com/organised-play/2020/road-na...


In [16]:
cru_with_decks = add_decks_to_meta(cru_df)

decklink is None
{'Country': 'NZ', 'Date': Timestamp('2020-08-30 00:00:00'), 'Decklist': 'Jasin Long Control Ninja Deck 30.8.20', 'Event': 'New Zealand - Road …', 'Format': 'CC', 'Hero': 'Katsu', 'Result': '1st', 'deck_link': None, 'event_link': 'https://fabtcg.com/organised-play/2020/road-nationals-2020/nz-road-nationals/'}


In [62]:
cru_breakdown = get_meta_share(cru_df)
cru_breakdown

Unnamed: 0,hero,count,percentage
Katsu,Katsu,12,0.272727
Dorinthea,Dorinthea,11,0.25
Rhinar,Rhinar,8,0.181818
Dash,Dash,8,0.181818
Bravo,Bravo,4,0.090909
Azalea,Azalea,1,0.022727
