Import dependancies.

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import ipywidgets as widgets
from IPython.display import display
from sklearn.decomposition import TruncatedSVD

Matplotlib settings.

In [2]:
plt.rcParams['figure.figsize'] = [5, 5]
plt.rcParams['figure.dpi'] = 100

Expansion specific global variables.

In [3]:
EXPANSION = 'SIR'
CSV_NAMES = {'MID':"game_data_public.MID.PremierDraft.csv",
             'NEO':"game_data_public.NEO.PremierDraft.csv",
             'SIR':"game_data_public.SIR.PremierDraft.csv"}

USER_WIN_RATE_BUCKET = {'MID':"user_win_rate_bucket",
                        'NEO':"user_game_win_rate_bucket",
                        'SIR':"user_game_win_rate_bucket"}
#To get information on new set use: df.info(max_cols=1106)

## Import 17lands data

Define data types for importing 17lands data.

In [4]:
MAX_MULLIGANS = 7
MAX_TURNS = 30

BASE_COLS = {
    'draft_id': str,
    'history_id': int,
    'time': str,
    'expansion': str,
    'format': str,
    'user_rank': str,
    'oppo_rank': str,
    'game_index': 'int8',
    'user_deck_colors': str,
    'oppo_deck_colors': str,
    'user_mulligans': 'int8',
    'oppo_mulligans': 'int8',
    'on_play': bool,
    'turns': 'int8',
    'won': bool,
    'missing_diffs': 'int8',
}

PER_TURN_COLS = {
    'cards_drawn': str,
    'cards_discarded': str,
    'lands_played': str,
    'cards_foretold': str,
    'creatures_cast': str,
    'non_creatures_cast': str,
    'user_instants_sorceries_cast': str,
    'oppo_instants_sorceries_cast': str,
    'user_abilities': str,
    'oppo_abilities': str,
    'user_cards_learned': str,
    'oppo_cards_learned': str,
    'creatures_attacked': str,
    'creatures_blocked': str,
    'creatures_unblocked': str,
    'creatures_blocking': str,
    'player_combat_damage_dealt': 'int8',
    'user_creatures_killed_combat': str,
    'oppo_creatures_killed_combat': str,
    'user_creatures_killed_non_combat': str,
    'oppo_creatures_killed_non_combat': str,
    'user_mana_spent': 'int8',
    'oppo_mana_spent': 'int8',
    'eot_user_cards_in_hand': str,
    'eot_oppo_cards_in_hand': str,
    'eot_user_lands_in_play': str,
    'eot_oppo_lands_in_play': str,
    'eot_user_creatures_in_play': str,
    'eot_oppo_creatures_in_play': str,
    'eot_user_non_creatures_in_play': str,
    'eot_oppo_non_creatures_in_play': str,
    'eot_user_life': 'int8',
    'eot_oppo_life': 'int8',
}

SUMMARY_COLS = {
    'cards_drawn': 'int8',
    'cards_discarded': 'int8',
    'lands_played': 'int8',
    'cards_foretold': 'int8',
    'creatures_cast': 'int8',
    'non_creatures_cast': 'int8',
    'instants_sorceries_cast': 'int8',
    'cards_learned': 'int8',
    'mana_spent': 'int16',
}

def get_dtypes():
    dtypes = BASE_COLS.copy()

    for x in range(1, MAX_MULLIGANS+1):
        dtypes[f'candidate_hand_{x}'] = str
    dtypes['opening_hand'] = str

    for turn in range(1, MAX_TURNS+1):
        for player in ['user', 'oppo']:
            for k, v in PER_TURN_COLS.items():
                dtypes[f'{player}_turn_{turn}_{k}'] = v
            dtypes[f'{player}_turn_{turn}_eot_oppo_cards_in_hand'] = 'int8'
        dtypes[f'oppo_turn_{turn}_cards_drawn'] = 'int8'

    for player in ['user', 'oppo']:
        for col in SUMMARY_COLS:
            dtypes[f'{player}_total_{col}'] = 'int8'

    return dtypes

Load 17lands data into dataframe.

In [5]:
dtypes=get_dtypes()
df = pd.read_csv(CSV_NAMES[EXPANSION], dtype=dtypes)

Create global variables for working data and transform.

In [6]:
data = []
transform = []

# Helper functions

Applies filters to find decks based on minimum winrate, deck colors and/or included cards.

In [7]:
def apply_filters(wr, colors, cards):
    #filter for winrate
    data = df[df[USER_WIN_RATE_BUCKET[EXPANSION]] >= wr]
    #filter for colors
    if (colors != 'ALL'): 
        data = data[data.main_colors == colors]
    #select just deck information
    data = data.filter(regex='deck_*', axis=1).drop(columns=['deck_Forest','deck_Island','deck_Mountain','deck_Plains','deck_Swamp'])
    #clean column names
    data.columns = data.columns.str.lstrip('deck_')
    #filter for specific cards
    if (cards and (cards[0] != 'ALL')):
        for card in cards:
            data = data[data[card] > 0]
    return data

SVD transform to dimensionally reduce from sparse matrix to dense for working list of decks. Returns DataFrame that is shape (n, 2).

In [8]:
def transform_svd():
    # define transform
    svd = TruncatedSVD()
    # apply transform to dataset
    t = svd.fit_transform(data)
    #change to a DataFrame, fixing labels for t
    return pd.DataFrame(data=t, columns=['a','b'], index=data.index)

Create scatter plot of decks. 

In [9]:
def graph_scatter(data=data, transform=transform, df=df):
    #C parameter is the winrate of deck
    win_rates = df.loc[data.index,USER_WIN_RATE_BUCKET[EXPANSION]]
    plt.scatter(transform['a'], transform['b'], c=win_rates, cmap='RdBu_r', s=(win_rates-win_rates.mean()).abs()*100)
    
    plt.grid(which='major', linestyle='-')
    plt.grid(which='minor', linestyle='--')
    plt.minorticks_on()

Dimensionality reduction using kurtosis-based projection pursuit: https://github.com/S-Driscoll/Projection-pursuit

In [10]:
from projpursuit import projpursuit
def graph_scatter_ppa(decks, df=df):
    transformed = projpursuit(decks, Meth='Mul', CenMeth='Rec', MaxMin='Min')
    print(transformed.head())

Generic function to find all unqiue values in an array plus 'ALL'.

In [11]:
ALL = 'ALL'
def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

# Widgets

In [12]:
%matplotlib inline

In [13]:
#define input widgets
wr_slider = widgets.FloatSlider(
    min=0,
    max=1,
    step=.01,
    description='Min WR:',
    value=0
)

color_dropdown = widgets.Dropdown(options=unique_sorted_values_plus_ALL(df.main_colors))

card_selectmultiple = widgets.SelectMultiple(
    options=unique_sorted_values_plus_ALL(
        df.filter(regex='deck_*', axis=1).drop(columns=['deck_Forest','deck_Island','deck_Mountain','deck_Plains','deck_Swamp']).columns.str.lstrip('deck_')
        )
    )

xmin_slider = widgets.FloatSlider(
    min=-3,
    max=10,
    step=.1,
    description='Min X:',
    value=0
)
xmax_slider = widgets.FloatSlider(
    min=-3,
    max=10,
    step=.1,
    description='Max X:',
    value=0
)
ymin_slider = widgets.FloatSlider(
    min=-3,
    max=10,
    step=.1,
    description='Min Y:',
    value=0
)
ymax_slider = widgets.FloatSlider(
    min=-3,
    max=10,
    step=.1,
    description='Max Y:',
    value=0
)

#define outputs
output = widgets.Output()
plot_output = widgets.Output()
explore_output = widgets.Output()


# Event handlers:

In [14]:
def common_eventhandler(wr, colors, cards):
    #refresh outputs
    output.clear_output(wait=True)
    plot_output.clear_output(wait=True)
    global data
    data = apply_filters(wr, colors, cards)
    global transform
    transform = transform_svd()
    with output:
        display(data)
    with plot_output:
        #C parameter is the winrate of deck
        win_rates = df.loc[data.index,USER_WIN_RATE_BUCKET[EXPANSION]]
        plt.scatter(transform['a'], transform['b'], c=win_rates, cmap='RdBu_r', s=(win_rates-win_rates.mean()).abs()*100)
    
        plt.grid(which='major', linestyle='-')
        plt.grid(which='minor', linestyle='--')
        plt.minorticks_on()
        plt.show()
#Show a list of the average number of cards within a given cluster of transform 't' defined by the boundries amin, amax, bmin and bmax.
def card_avgs(amin, amax, bmin, bmax, num_cards=50):
    t = transform
    #refresh output
    explore_output.clear_output(wait=True)
    plot_output.clear_output(wait=True)
    #find decks within the boundries
    t = t[t['a']>=amin]
    t = t[t['a']<=amax]
    t = t[t['b']>=bmin]
    t = t[t['b']<=bmax]
    #pull out the corresponding rows from decks
    quad = data.loc[t.index]
    #remove any columns with all zeros
    quad = quad.loc[:, (quad != 0).any(axis=0)]
    #find average number of cards within all decks
    all_avgs = data.loc[:, quad.columns].mean()
    p = (f'{quad.shape[0]} decks in area. With average win rate of {df.loc[quad.index][USER_WIN_RATE_BUCKET[EXPANSION]].mean() * 100}%')
    #take the mean of each column and then find the ratio of card average in quad vs entire sample
    quad = quad.mean()
    quad = pd.concat([quad, quad/all_avgs], axis=1, keys=['Average', 'Synergy'])
    #sort by highest average number of cards
    quad = quad.sort_values(by='Average', ascending=False)
    with explore_output:
        print(p, quad.head(num_cards))
    with plot_output:
        plt.gca().add_patch(Rectangle((xmin_slider.value,ymin_slider.value),
                                      xmax_slider.value-xmin_slider.value,
                                      ymax_slider.value-ymin_slider.value,
                edgecolor='red',
                facecolor='none',
                lw=2))
                #C parameter is the winrate of deck
        win_rates = df.loc[data.index,USER_WIN_RATE_BUCKET[EXPANSION]]
        plt.scatter(transform['a'], transform['b'], c=win_rates, cmap='RdBu_r', s=(win_rates-win_rates.mean()).abs()*100)
    
        plt.grid(which='major', linestyle='-')
        plt.grid(which='minor', linestyle='--')
        plt.minorticks_on()
        plt.show()

Defining and binding event handlers to widgets.

In [15]:
def wr_slider_eventhandler(change):
    common_eventhandler(change.new, color_dropdown.value, card_selectmultiple.value)

def color_dropdown_eventhandler(change):
    common_eventhandler(wr_slider.value, change.new, card_selectmultiple.value)
    
def card_selectmultiple_eventhandler(change):
    common_eventhandler(wr_slider.value, color_dropdown.value, change.new)

def xmin_slider_eventhandler(change):
    card_avgs(change.new, xmax_slider.value, ymin_slider.value, ymax_slider.value)
    
def xmax_slider_eventhandler(change):
    card_avgs(xmin_slider.value, change.new, ymin_slider.value, ymax_slider.value)
    
def ymin_slider_eventhandler(change):
    card_avgs(xmin_slider.value, xmax_slider.value, change.new, ymax_slider.value)
    
def ymax_slider_eventhandler(change):
    card_avgs(xmin_slider.value, xmax_slider.value, ymin_slider.value, change.new)
    
wr_slider.observe(wr_slider_eventhandler, names='value')
color_dropdown.observe(color_dropdown_eventhandler, names='value')
card_selectmultiple.observe(card_selectmultiple_eventhandler, names='value')
    
xmin_slider.observe(xmin_slider_eventhandler, names='value')
xmax_slider.observe(xmax_slider_eventhandler, names='value')
ymin_slider.observe(ymin_slider_eventhandler, names='value')
ymax_slider.observe(ymax_slider_eventhandler, names='value')

# GUI

In [16]:
#matplotlib settings
plt.rcParams['figure.figsize'] = [5, 5]
plt.rcParams['figure.dpi'] = 100

#input widgets
input_widgets = widgets.HBox(
[wr_slider, color_dropdown, card_selectmultiple])
#explore widgets
explore_widgets = widgets.HBox(
[xmin_slider, xmax_slider, ymin_slider, ymax_slider])
#output widgets
tab = widgets.Tab([plot_output, output])
tab.set_title(0, 'Deck Similarity')
tab.set_title(1, 'Deck Data')
#dashboard then display
dashboard = widgets.VBox([input_widgets, tab, explore_widgets, explore_output])
display(dashboard)

VBox(children=(HBox(children=(FloatSlider(value=0.0, description='Min WR:', max=1.0, step=0.01), Dropdown(opti…