In [None]:
import re

from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

First lets work out how many pages are on the tournament decklists page.

In [None]:
def get_number_of_pages():
    r = requests.get(f"https://fabtcg.com/decklists/?page=1")
    soup = BeautifulSoup(r.text, 'html.parser')
    last = int(soup.findAll("a", {"class": "page-link starling"})[-1]['href'].partition("=")[-1])
    return last

Now on each page we have to read the table, find all the links, and stitch them together.

In [None]:
def get_page(page):
    r = requests.get(f"https://fabtcg.com/decklists/?page={page}")
    soup = BeautifulSoup(r.text, 'html.parser')
    
    df = pd.read_html(f"https://fabtcg.com/decklists/?page={page}", flavor='bs4')[0]
    
    table = soup.find('table')
    links = {}
    for tr in table.findAll("tr"):
        trs = tr.findAll("td")
        for each in trs:
            try:
                a_text = re.sub('\s+',' ', each.find('a').contents[0])
                a_link = each.find('a')['href']
                links[a_text] = a_link
            except:
                pass

    def fetch_deck_url(row):
        try:
            deck = links[row['Decklist']]
        except:
            deck = None

        return deck

    def fetch_event_url(row):
        try:
            event = links[row['Event']]
        except:
            event = None

        return event
    
    df['deck_link'] = df.apply(fetch_deck_url, axis=1)
    df['event_link'] = df.apply(fetch_event_url, axis=1)
    df['Date'] = pd.to_datetime(df['Date'])
    
    return df

Loop through the available pages and concat out the df.

In [None]:
def get_all_pages():
    dfs = []
    for i in range(1, get_number_of_pages()):
        dfs.append(get_page(i))
        
    return pd.concat(dfs).reset_index(drop=True)

In [None]:
df = get_all_pages()

In [None]:
df.to_csv("tournament_lists.csv")

Next we're going to use the cards we got from FABDB to get more data on card data.

In [None]:
card_df = pd.read_csv("cards.csv").drop(['image', 'resource', 'name'], axis=1)

In [None]:
def enrich_deck(deck_df, card_df):
    return pd.merge(deck_df, card_df, left_on=['name_resource'], right_on=['name_resource'])

In [None]:
def get_deck(url):
    r = requests.get(url)
    soup = r.text
    frames = pd.read_html(url, flavor='bs4')
    
    metadata = {frames[0].iloc[:, 0].to_list()[i]: frames[0].iloc[:, 1].to_list()[i] for i in range(frames[0].shape[0])}
    equipment = frames[1].iloc[:, 0].str.extract("(?P<copies>.*?) x (?P<name>.*)")
    
    dfs = []
    for i in frames[2:]:
        dfs.append(i.iloc[:, 0].str.extract("(?P<copies>.*?) x (?P<name>.*) \((?P<resource>.*)\)"))    
    dfs.append(equipment)
    df = pd.concat(dfs)
    
    def transform_name(row):

        resource_map = {
            '3': '(Blue)',
            '2': '(Yellow)',
            '1': '(Red)',
            np.nan: '',
        }
        
        return f"{row['name']} {resource_map[row['resource']]}"
    
    df['name_resource'] = df.apply(transform_name, axis=1)
    df = enrich_deck(df, card_df)
    
    numeric_cols = ['copies', 'resource', 'attack', 'cost', 'defense', 'intellect', 'life']
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
    
    return metadata, df

In [None]:
_, td = get_deck('https://fabtcg.com/decklists/huang-chih-wei-warrior-deck-171020/')

In [None]:
td.head()

In [None]:
CRU_release = '2020-08-28'

def get_meta(df, date, tournament_format):
    return df[(df['Format'] == tournament_format) & (df['Date'] > date)]

In [None]:
cru_df = get_meta(df, date=CRU_release, tournament_format="CC")

In [None]:
def add_decks_to_meta(meta_df):
    meta_list = meta_df.to_dict(orient='records')
    for i in meta_list:
        if i['deck_link'] is None:
            print("decklink is None")
            print(i)
        else:
            i['metadata'], i['deck'] = get_deck(i['deck_link'])
    
    return meta_list

In [None]:
def get_meta_share(df):
    hero_counts = df['Hero'].value_counts()
    hero_counts_df = pd.DataFrame({'hero': hero_counts.index, 'count': hero_counts, 'percentage': hero_counts / hero_counts.sum()})
    
    return hero_counts_df

In [None]:
cru_with_decks = add_decks_to_meta(cru_df)
len(cru_with_decks)

In [None]:
cru_breakdown = get_meta_share(cru_df)
cru_breakdown

In [None]:
def count_card_group(group):
    
    # name = group.iloc[0]['name']
    total_copies = group['copies'].sum()
    if group[group['resource'] == 1].shape[0] > 0:
        red_copies = group[group['resource'] == 1].iloc[0]['copies']
    else:
        red_copies = 0
    if group[group['resource'] == 2].shape[0] > 0:
        yellow_copies = group[group['resource'] == 2].iloc[0]['copies']
    else:
        yellow_copies = 0
    if group[group['resource'] == 3].shape[0] > 0:
        blue_copies = group[group['resource'] == 3].iloc[0]['copies']
    else:
        blue_copies = 0

    return pd.DataFrame.from_dict({
        # 'name': name,
        'total_copies': [total_copies],
        'red_copies': [red_copies],
        'yellow_copies': [yellow_copies],
        'blue_copies': [blue_copies],
        })

def get_card_counts(deck, split_equipment=False):
    
    if split_equipment is False:
        counts = deck.groupby('name').apply(count_card_group)
        counts.reset_index(inplace=True)
        counts.drop('level_1', axis=1, inplace=True)

        return counts
    else:  # split_equipment is True
        

In [None]:
def get_staples(meta_list):
    counts = []
    for i in meta_list:
        counts.append(get_card_counts(i['deck']))
    c_concat = pd.concat(counts)
    v_counts = c_concat['name'].value_counts().rename_axis('name').reset_index(name='decks')
    staples = c_concat.groupby('name').mean()
    staples = staples.merge(v_counts, how='outer', on="name")
    staples.rename(columns={"decks_x": "decks"}, inplace=True)
    staples.sort_values('decks', ascending=False, inplace=True)
    staples['percentage_of_decks'] = sorted_staples['decks'] / len(meta_list)

    return staples


In [None]:
get_staples(cru_with_decks).to_csv("staples.csv")