In [1]:

# Configure Drive or Jupyter notebook -- only runs when first loaded
if "CONFIG_DONE" not in globals():
    # Need to mount drive and clone repo to access data and functions
    try:
        from google.colab import drive  # type: ignore

        IN_COLAB = True

        # clone repo
        !git clone https://github.com/doctorsmylie/mtg-draft-agent
        %cd mtg-draft-agent

    except ModuleNotFoundError:
        IN_COLAB = False

    # Finish configuration -- also configures notebook outside of Colab
    %run "project_path.ipynb"
else:
    print("Config done already")

Cloning into 'mtg-draft-agent'...
remote: Enumerating objects: 79, done.[K
remote: Counting objects: 100% (79/79), done.[K
remote: Compressing objects: 100% (53/53), done.[K
remote: Total 79 (delta 33), reused 52 (delta 22), pack-reused 0 (from 0)[K
Receiving objects: 100% (79/79), 181.35 KiB | 1.83 MiB/s, done.
Resolving deltas: 100% (33/33), done.
/content/mtg-draft-agent
Starting config...
Running in Colab? Yes

Configuring Google Colab...
Mounting Drive...
Mounted at /content/mtg-draft-agent/drive
BASE_PATH =  /content/mtg-draft-agent
DATA_FOLDER = /content/mtg-draft-agent/drive/MyDrive/Erdos25/MTGdraft
BASE_PATH == os.getcwd(): True

Configuration done


# Testing deck evaluation metrics

Here, we search for suitable metrics with which to evaluate generated decks.

In [2]:
import zipfile
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# For managing paths
import pathlib

First, we import the game data.

In [3]:
folder='drive/MyDrive/MTGdraft'
expansion = 'DSK'
gamefilename ='game_data_public.DSK.PremierDraft.csv.gz'
game_file = pathlib.Path(folder, expansion, gamefilename)

gamedata=pd.read_csv(game_file,compression='gzip',nrows=20000)

  gamedata=pd.read_csv(game_file,compression='gzip',nrows=20000)


In [4]:
gamedata.describe()

Unnamed: 0,build_index,match_number,game_number,opp_rank,num_mulligans,opp_num_mulligans,num_turns,opening_hand_Abandoned Campground,drawn_Abandoned Campground,tutored_Abandoned Campground,...,tutored_Withering Torment,deck_Withering Torment,sideboard_Withering Torment,"opening_hand_Zimone, All-Questioning","drawn_Zimone, All-Questioning","tutored_Zimone, All-Questioning","deck_Zimone, All-Questioning","sideboard_Zimone, All-Questioning",user_n_games_bucket,user_game_win_rate_bucket
count,20000.0,20000.0,20000.0,0.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,...,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,0.1874,3.77295,1.0,,0.1126,0.1223,8.9742,0.02275,0.0271,0.00015,...,0.0004,0.09535,0.0127,0.01005,0.01645,0.0,0.0589,0.0068,149.59415,0.537583
std,0.465609,2.166364,0.0,,0.331703,0.343873,2.505703,0.151108,0.166934,0.012247,...,0.019996,0.330097,0.113311,0.101733,0.130691,0.0,0.252456,0.082183,146.991312,0.064872
min,0.0,1.0,1.0,,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,0.0,2.0,1.0,,0.0,0.0,7.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.48
50%,0.0,3.0,1.0,,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.54
75%,0.0,5.0,1.0,,0.0,0.0,10.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.6
max,4.0,9.0,1.0,,3.0,3.0,26.0,2.0,2.0,1.0,...,1.0,3.0,2.0,2.0,2.0,0.0,2.0,1.0,500.0,0.74


Now, we remove entries from users with fewer than 10 total games played.

In [5]:
df_filtered = gamedata.loc[gamedata['user_n_games_bucket']>=10]
df_filtered.describe()

Unnamed: 0,build_index,match_number,game_number,opp_rank,num_mulligans,opp_num_mulligans,num_turns,opening_hand_Abandoned Campground,drawn_Abandoned Campground,tutored_Abandoned Campground,...,tutored_Withering Torment,deck_Withering Torment,sideboard_Withering Torment,"opening_hand_Zimone, All-Questioning","drawn_Zimone, All-Questioning","tutored_Zimone, All-Questioning","deck_Zimone, All-Questioning","sideboard_Zimone, All-Questioning",user_n_games_bucket,user_game_win_rate_bucket
count,19985.0,19985.0,19985.0,0.0,19985.0,19985.0,19985.0,19985.0,19985.0,19985.0,...,19985.0,19985.0,19985.0,19985.0,19985.0,19985.0,19985.0,19985.0,19985.0,19985.0
mean,0.187541,3.77383,1.0,,0.112534,0.122342,8.97353,0.022567,0.02707,0.00015,...,0.0004,0.095422,0.01271,0.010058,0.016462,0.0,0.058944,0.006805,149.703277,0.537788
std,0.465756,2.16662,0.0,,0.331638,0.343931,2.505848,0.15053,0.166853,0.012251,...,0.020004,0.33021,0.113353,0.10177,0.13074,0.0,0.252545,0.082214,146.992457,0.064344
min,0.0,1.0,1.0,,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.16
25%,0.0,2.0,1.0,,0.0,0.0,7.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.48
50%,0.0,3.0,1.0,,0.0,0.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.54
75%,0.0,5.0,1.0,,0.0,0.0,10.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.6
max,4.0,9.0,1.0,,3.0,3.0,26.0,2.0,2.0,1.0,...,1.0,3.0,2.0,2.0,2.0,0.0,2.0,1.0,500.0,0.74


Now, we split the data into training and validation sets.

In [6]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import root_mean_squared_error
from sklearn.ensemble import RandomForestRegressor



In [7]:
df_train, df_test = train_test_split(df_filtered,test_size=0.20,random_state=330)
targets_train = df_train['user_game_win_rate_bucket']
features_train = df_train.filter(regex='^deck')
targets_test = df_test['user_game_win_rate_bucket']
features_test = df_test.filter(regex='^deck')

In [11]:
rf = RandomForestRegressor(n_estimators=100,max_depth=8)
rf.fit(features_train,targets_train)

In [12]:
y_pred = rf.predict(features_test)
root_mean_squared_error(targets_test,y_pred)

0.05696595515783297

In [None]:
ests = [100,200]
depths = range(6,9)
grid_search = GridSearchCV(RandomForestRegressor(), param_grid={'n_estimators': ests, 'max_depth': depths},
    scoring='neg_mean_squared_error')

grid_search.fit(features_train, targets_train)

In [None]:
grid_search.best_params_