In [1]:
import pandas as pd
import xlrd
import numpy as np
from scipy.spatial.distance import pdist, squareform
import pickle
from tqdm import tqdm
from annoy import AnnoyIndex
from sklearn.preprocessing import normalize
import itertools
from statsmodels.stats.proportion import proportion_confint
from operator import itemgetter
from collections import defaultdict
import os
from google.cloud import storage
import json
from tqdm import tqdm

In [2]:
df = pd.read_excel('card_stats.xlsx')
print(df.shape)
df.head()

(99, 35)


Unnamed: 0,name,rarity_common,rarity_rare,rarity_epic,rarity_legendary,troop,troop_ground,troop_air,building,spell,...,move_speed_very_fast,radius,width,range,range_melee_short,range_melee_medium,range_melee_long,spawner,duration,hitpoints
0,Knight,1,0,0,0,1,1,0,0,0,...,0,0.0,0.0,0.0,0,1,0,0,0.0,1452.0
1,Archers,1,0,0,0,1,1,0,0,0,...,0,0.0,0.0,5.0,0,0,0,0,0.0,252.0
2,Goblins,1,0,0,0,1,1,0,0,0,...,1,0.0,0.0,0.0,1,0,0,0,0.0,167.0
3,Giant,0,1,0,0,1,1,0,0,0,...,0,0.0,0.0,0.0,0,1,0,0,0.0,3275.0
4,P.E.K.K.A,0,0,1,0,1,1,0,0,0,...,0,0.0,0.0,0.0,0,1,0,0,0.0,3125.0


In [3]:
stats_matrix = df.values[:,1:]
stats_matrix.shape

(99, 34)

# Get single card recommendations

In [None]:
sim_matrix = 1 - squareform(pdist(stats_matrix, 'cosine'))
sim_matrix.shape

In [4]:
card_name = 'Ram Rider'
card_ind = np.where(df['name'] == card_name)[0][0]
card_sim_vec = sim_matrix[card_ind]
rec_inds = np.argsort(card_sim_vec)[::-1]
df['name'].values[rec_inds][:10]

array(['Ram Rider', 'Cannon Cart', 'Knight', 'Hog Rider', 'Barbarian Hut',
       'Royal Giant', 'Battle Healer', 'Royal Recruits', 'Goblin Hut',
       'Royal Hogs'], dtype=object)

# Get full deck recommendations (using deck averages)

In [5]:
battles = pickle.loads(open('user_data_9Q9GY29CQ.p', 'rb').read())
stats_dict = dict(zip(df['name'], stats_matrix))

In [6]:
all_battles = battles['team_cards'] + battles['opponent_cards']
all_battles = [str(sorted(b)) for b in all_battles]
all_battles = set(all_battles)
all_battles = [eval(b) for b in all_battles]
len(all_battles)

744

In [7]:
all_battles_dict = {}
for b in all_battles:
    all_battles_dict[str(b)] = np.array([stats_dict[c] for c in b]).mean(axis=0)
len(all_battles_dict)

744

In [28]:
model = AnnoyIndex(34, 'dot')
c = 0
index2id = {}
for deck, vec in tqdm(all_battles_dict.items()):
    vec_norm = normalize(vec.reshape(-1, 1), norm='l2', axis=0).reshape(1,-1)[0]
    model.add_item(c, vec_norm)
    index2id[c] = deck
    c += 1

model.build(10)

100%|██████████████████████████████████████████████████████████████████████████████| 744/744 [00:00<00:00, 5471.59it/s]


True

In [29]:
# model.save('2020_06_10_deck_model.ann')
# model = AnnoyIndex(34, 'dot')
# model.load('2020_06_10_deck_model.ann')

In [30]:
deck_cards = ['Bomb Tower', 'Hog Rider', 'The Log', 'Tornado', 
              'Earthquake', 'Skeletons', 'Musketeer', 'Ice Golem']
deck_cards = ['Cannon', 'Hog Rider', 'The Log', 'Fireball', 
              'Ice Spirit', 'Skeletons', 'Musketeer', 'Ice Golem']
deck_cards = ['Mega Knight', 'Ram Rider', 'Inferno Dragon', 'Bandit', 
              'Valkyrie', 'Tornado', 'Barbarian Barrel', 'Poison']
search_vec = np.array([stats_dict[c] for c in deck_cards]).mean(axis=0)
norm_search_vec = normalize(search_vec.reshape(-1, 1), norm='l2', axis=0)
rec_inds, scores = model.get_nns_by_vector(norm_search_vec, 5, include_distances=True)
print(scores)
[index2id[i] for i in rec_inds]

[0.9997736811637878, 0.99971604347229, 0.9997141361236572, 0.999648928642273, 0.999648928642273]


["['Archers', 'Goblin Barrel', 'Inferno Dragon', 'Mega Knight', 'Miner', 'Skeleton Army', 'Valkyrie', 'Zap']",
 "['Battle Healer', 'Cannon Cart', 'Dart Goblin', 'Elixir Golem', 'Goblins', 'Magic Archer', 'Poison', 'The Log']",
 "['Cannon Cart', 'Dark Prince', 'Electro Wizard', 'Freeze', 'Magic Archer', 'Mega Knight', 'Skeleton Army', 'Witch']",
 "['Bandit', 'Barbarian Barrel', 'Bats', 'Electro Wizard', 'Magic Archer', 'Mega Knight', 'Poison', 'Royal Hogs']",
 "['Bats', 'Elixir Collector', 'Golem', 'Mini P.E.K.K.A', 'Tornado', 'Valkyrie', 'Wizard', 'Zap']"]

In [34]:
list([str(b) for b in np.random.choice(all_battles, 5)])

["['Fireball', 'Furnace', 'Mini P.E.K.K.A', 'Prince', 'Valkyrie', 'Witch', 'Wizard', 'Zap']",
 "['Ice Golem', 'Ice Wizard', 'Mega Minion', 'Rocket', 'Skeletons', 'Tesla', 'Tornado', 'X-Bow']",
 "['Balloon', 'Bats', 'Giant Snowball', 'Ice Golem', 'Magic Archer', 'Miner', 'Sparky', 'Tornado']",
 "['Baby Dragon', 'Bats', 'Goblin Gang', 'Hog Rider', 'Mini P.E.K.K.A', 'Royal Giant', 'The Log', 'Valkyrie']",
 "['Baby Dragon', 'Bats', 'Firecracker', 'Goblin Barrel', 'Mega Knight', 'The Log', 'Valkyrie', 'Wall Breakers']"]

In [None]:
# Scores are a bit too similar... maybe a different way to combine card vectors?
# Re-rank by win percentage and play count?

# Get full deck recommendations (using all card vectors)

In [6]:
battles = pickle.loads(open('user_data/user_data_9Q9GY29CQ.p', 'rb').read())
stats_dict = dict(zip(df['name'], stats_matrix))

In [8]:
all_battles = battles['team_cards'] + battles['opponent_cards']
all_battles = [str(sorted(b)) for b in all_battles]
all_battles = set(all_battles)
all_battles = [sorted(eval(b)) for b in all_battles]
len(all_battles)

744

In [9]:
all_battles_dict = {}
for b in all_battles:
    all_battles_dict[str(b)] = np.array(list(itertools.chain(*[stats_dict[c] for c in b])))
len(all_battles_dict)

744

In [10]:
model = AnnoyIndex(272, 'dot')
c = 0
e = 0
index2id = {}
for deck, vec in tqdm(all_battles_dict.items()):
    if len(vec)==272:
        vec_norm = normalize(vec.reshape(-1, 1), norm='l2', axis=0).reshape(1,-1)[0]
        model.add_item(c, vec_norm)
        index2id[c] = deck
        c += 1
    else:
        e += 1

model.build(10)

100%|██████████████████████████████████████████████████████████████████████████████| 744/744 [00:00<00:00, 3180.24it/s]


True

In [11]:
e

10

In [12]:
deck_cards = ['Bomb Tower', 'Hog Rider', 'The Log', 'Tornado', 
              'Earthquake', 'Skeletons', 'Musketeer', 'Ice Golem']
# deck_cards = ['Cannon', 'Hog Rider', 'The Log', 'Fireball', 
#               'Ice Spirit', 'Skeletons', 'Musketeer', 'Ice Golem']
# deck_cards = ['Mega Knight', 'Ram Rider', 'Inferno Dragon', 'Bandit', 
#               'Valkyrie', 'Tornado', 'Barbarian Barrel', 'Poison']
search_vec = np.array(list(itertools.chain(*[stats_dict[c] for c in sorted(deck_cards)])))
norm_search_vec = normalize(search_vec.reshape(-1, 1), norm='l2', axis=0)
rec_inds, scores = model.get_nns_by_vector(norm_search_vec, 5, include_distances=True)
print(scores)
[index2id[i] for i in rec_inds]

[0.9999998807907104, 0.8810864686965942, 0.8694665431976318, 0.8665416240692139, 0.8472418189048767]


["['Bomb Tower', 'Earthquake', 'Hog Rider', 'Ice Golem', 'Musketeer', 'Skeletons', 'The Log', 'Tornado']",
 "['Balloon', 'Bats', 'Cannon Cart', 'Dark Prince', 'Goblin Hut', 'Musketeer', 'Poison', 'The Log']",
 "['Bowler', 'Earthquake', 'Hog Rider', 'Mini P.E.K.K.A', 'Princess', 'Skeleton Army', 'Tornado', 'Wizard']",
 "['Giant Skeleton', 'Goblin Barrel', 'Goblin Cage', 'Inferno Dragon', 'Skeleton Army', 'Skeleton Barrel', 'Tornado', 'Zap']",
 "['Elite Barbarians', 'Ice Spirit', 'Inferno Dragon', 'Miner', 'Mini P.E.K.K.A', 'Rage', 'Wizard', 'Zap']"]

In [None]:
# I liked the other ones better I think
# It doesn't seem to capture whether a card is in the right place
# How to make a vector where the order doesn't exactly matter?

# Get full deck recommendations (percentiles of card vectors)

In [4]:
stats_dict = dict(zip(df['name'], stats_matrix))

In [5]:
client = storage.Client.from_service_account_json('../royaleapp-296a6cea39ad.json')
bucket = client.bucket('royale-data')
blobs = bucket.list_blobs(prefix='user_data')
player_tags = [blob.name.split('/')[1][:-5] for blob in blobs if blob.name.endswith('.json')]
player_tags

['220PV9GU',
 '2UGUVYU29',
 '80G9U0L9V',
 '80URVV8JY',
 '9L9009G9P',
 '9Q9GY29CQ',
 'P2RUP82LJ',
 'P9VUGYL8L',
 'Y8LGYQYC',
 'YLV0PUJQ']

## Original Training

In [30]:
%%time
all_battles = []
deck_win_dict = defaultdict(lambda: {'play_count':0, 'win_count':0, 'win_rate':0})
for player_tag in tqdm(player_tags):
    blob = bucket.get_blob('user_data/{}.json'.format(player_tag))
    battles = json.loads(blob.download_as_string())  
    
    # Make list of all battles
    all_battles += battles['team_cards'] + battles['opponent_cards']
    
    # Make deck win count dictionary
    for team_cards, opponent_cards, win_loss in zip(battles['team_cards'], 
                                                    battles['opponent_cards'], 
                                                    battles['win_loss']):
        if len(team_cards) != 8:
            pass
        else:
            team_cards = str(sorted(team_cards))
            opponent_cards = str(sorted(opponent_cards))
            deck_win_dict[team_cards]['play_count'] += 1
            deck_win_dict[opponent_cards]['play_count'] += 1
            if win_loss == 'win':
                deck_win_dict[team_cards]['win_count'] += 1
                deck_win_dict[team_cards]['win_rate'] = deck_win_dict[team_cards]['win_count'] \
                    / deck_win_dict[team_cards]['play_count']
            else:
                deck_win_dict[opponent_cards]['win_count'] += 1
                deck_win_dict[opponent_cards]['win_rate'] = deck_win_dict[opponent_cards]['win_count'] \
                    / deck_win_dict[opponent_cards]['play_count']

all_battles = [str(sorted(b)) for b in all_battles]
all_battles = set(all_battles)
all_battles = [sorted(eval(b)) for b in all_battles]

all_battles_dict = {}
for b in all_battles:
    if len(b) == 8:
        all_battles_dict[str(b)] = np.percentile(np.array([stats_dict[c] for c in b]), list(np.arange(0,105,5)), axis=0).flatten()

len(all_battles), len(deck_win_dict), len(all_battles_dict)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.88it/s]


Wall time: 7.25 s


(7001, 6934, 6934)

## Modified Original Training x 100 Load
(original training with 100x load is ~30x increased time at 3 min 47 sec)

In [6]:
%%time
from collections import Counter
all_battles = []
deck_win_dict = defaultdict(lambda: {'play_count':0, 'win_count':0, 'win_rate':0})
for player_tag in tqdm(player_tags): # tested * 100x original load (~30x increased time)
    blob = bucket.get_blob('user_data/{}.json'.format(player_tag))
    battles = json.loads(blob.download_as_string())  
    
    # Make list of all battles
    all_battles += battles['team_cards'] + battles['opponent_cards']
    
    # Make deck win count dictionary
    for team_cards, opponent_cards, win_loss in zip(battles['team_cards'], 
                                                    battles['opponent_cards'], 
                                                    battles['win_loss']):
        if len(team_cards) != 8:
            pass
        else:
            team_cards = str(sorted(team_cards))
            opponent_cards = str(sorted(opponent_cards))
            deck_win_dict[team_cards]['play_count'] += 1
            deck_win_dict[opponent_cards]['play_count'] += 1
            if win_loss == 'win':
                deck_win_dict[team_cards]['win_count'] += 1
                deck_win_dict[team_cards]['win_rate'] = deck_win_dict[team_cards]['win_count'] \
                    / deck_win_dict[team_cards]['play_count'] # Could do this calculation at the end
            else:
                deck_win_dict[opponent_cards]['win_count'] += 1
                deck_win_dict[opponent_cards]['win_rate'] = deck_win_dict[opponent_cards]['win_count'] \
                    / deck_win_dict[opponent_cards]['play_count']

all_battles = [str(sorted(b)) for b in all_battles]
all_battles = [sorted(eval(b[0])) for b in Counter(all_battles).most_common()[:10000]]

all_battles_dict = {}
for b in all_battles:
    if len(b) == 8:
        all_battles_dict[str(b)] = np.percentile(np.array([stats_dict[c] for c in b]), list(np.arange(0,105,5)), axis=0).flatten()

len(all_battles), len(deck_win_dict), len(all_battles_dict)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.72it/s]


Wall time: 9.4 s


(7928, 7861, 7861)

## Build the Model

In [28]:
%%time
vec_len = list(all_battles_dict.values())[0].shape[0]
model = AnnoyIndex(vec_len, 'dot')
c = 0
index2id = {}
for deck, vec in tqdm(all_battles_dict.items()):
    vec_norm = normalize(vec.reshape(-1, 1), norm='l2', axis=0).reshape(1,-1)[0]
    model.add_item(c, vec_norm)
    index2id[c] = deck
    c += 1

model.build(10)

100%|████████████████████████████████████████████████████████████████████████████| 7861/7861 [00:03<00:00, 2102.47it/s]


Wall time: 4.98 s


True

In [29]:
%%timeit
# test
len(model.get_nns_by_item(0, n=-1))

26.5 ms ± 558 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Save Models

In [30]:
# # Save for use in app
# model.save('2020_07_24_deck_recommender.ann')
# json.dump(deck_win_dict, open('2020_07_24_deck_win_dict.json', 'w'))
# json.dump(index2id, open('2020_07_24_index2id.json', 'w'))

In [31]:
# index2id == json.loads(open('2020_07_24_index2id.json', 'r').read()), deck_win_dict == json.loads(open('2020_07_24_deck_win_dict.json', 'r').read())

## Predictions

In [33]:
# deck_cards = ['Cannon', 'Hog Rider', 'The Log', 'Ice Spirit', 
#               'Fireball', 'Skeletons', 'Musketeer', 'Ice Golem']
# deck_cards = ['Royal Giant', 'Goblin Cage', 'Royal Delivery', 'Heal Spirit', 
#               'Firecracker', 'Skeletons', 'Bats', 'Earthquake']
# deck_cards = ['Valkyrie', 'Bandit', 'Miner', 'Arrows', 
#               'Bats', 'Wall Breakers', 'Musketeer', 'Royal Delivery']
deck_cards = ['Sparky', 'Giant', 'Spear Goblins', 'Poison', 
              'The Log', 'Skeleton Army', 'Skeleton Dragons', 'Bats']
search_vec = np.percentile(np.array([stats_dict[c] for c in deck_cards]), list(np.arange(0,105,5)), axis=0).flatten()
norm_search_vec = normalize(search_vec.reshape(-1, 1), norm='l2', axis=0)
rec_inds, scores = model.get_nns_by_vector(norm_search_vec, 50, include_distances=True)

# Rank recommendations
out = []
for rec_ind, score in zip(rec_inds, scores):
    deck_data = deck_win_dict[index2id[rec_ind]]
    int_play_counts = deck_data['play_count']
    float_win_percs = deck_data['win_rate']
    win_confidence = proportion_confint(np.round(float_win_percs*int_play_counts).astype(int), int_play_counts, alpha=0.05, method='wilson')[0]
    out.append([index2id[rec_ind], int_play_counts, float_win_percs, score, win_confidence*score])
    
# Display recommendations
# sorted(out, key=itemgetter(4), reverse=True)[:10]
out[:10]

[["['Bats', 'Giant', 'Poison', 'Skeleton Army', 'Skeleton Dragons', 'Sparky', 'Spear Goblins', 'The Log']",
  5,
  0.6,
  1.000000238418579,
  0.23072433628496822],
 ["['Bats', 'Giant', 'Poison', 'Skeleton Army', 'Sparky', 'Spear Goblins', 'Wizard', 'Zap']",
  5,
  0.6666666666666666,
  0.9985234141349792,
  0.23038359706356368],
 ["['Bats', 'Giant', 'Poison', 'Skeleton Army', 'Sparky', 'Spear Goblins', 'The Log', 'Wizard']",
  1,
  0,
  0.9983831644058228,
  0.0],
 ["['Bats', 'Firecracker', 'Giant', 'Poison', 'Skeleton Army', 'Sparky', 'Spear Goblins', 'The Log']",
  1,
  0,
  0.9975006580352783,
  0.0],
 ["['Arrows', 'Bats', 'Giant', 'Graveyard', 'Musketeer', 'Skeleton Army', 'Sparky', 'Zap']",
  2,
  0.5,
  0.996845006942749,
  0.09423296043644562],
 ["['Arrows', 'Bats', 'Giant', 'Giant Snowball', 'Graveyard', 'Musketeer', 'Skeleton Army', 'Sparky']",
  1,
  1.0,
  0.996845006942749,
  0.20589765272439725],
 ["['Arrows', 'Bats', 'Giant', 'Goblin Gang', 'Minion Horde', 'Skeleton Army