In [256]:
import os.path
from typing import List
from datetime import date, datetime, timedelta

import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from kmodes.kmodes import KModes
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, MiniBatchKMeans, DBSCAN
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer
from sklearn import metrics

from datetime import date
# from thefuzz import fuzz

%matplotlib inline
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.max_colwidth = 160

from utils.configuration import settings
from utils.utils import *
from tft.api import *

In [257]:
RANDOM_STATE = 42

API_KEY: str = settings.api_key
ASSETS_DIR: str = settings.assets_dir
SERVER = 'na1'  # euw1 na1 kr oc1
LEAGUE='challengers' # challengers grandmasters
MAX_COUNT: int = settings.max_count
LATEST_RELEASE = '12.13.453.3037' # '12.12.450.4196''12.13.453.3037' Version 12.12.448.6653 12.11.446.9344 Version 12.13.453.3037 (Jul 11 2022/18:39:20) [PUBLIC] <Releases/12.13>
PATCH: date = date(2022, 7, 16)  # date(2022, 7, 1) date(2022, 7, 16)
THREEDAY: datetime = (datetime.now() - timedelta(days=3)).strftime("%Y-%m-%d")

TARGETNAME = 'placement'

# Data Loading

In [258]:
# raw_df: DataFrame = pd.read_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_matches.pickle'))
raw_df: DataFrame = pd.read_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{THREEDAY}_matches.pickle'))

# Preprocessing

In [259]:
def impute(df) -> DataFrame:
    for name in df.select_dtypes("number"):
        df[name] = df[name].fillna(0)
    for name in df.select_dtypes("object"):
        df[name] = df[name].fillna("None")
    return df

raw_df = impute(raw_df)

In [260]:
match_id_df: Series = raw_df['match_id']
X: DataFrame = raw_df.drop(['match_id'], axis=1)
y: Series = X.pop(TARGETNAME)
X.fillna('', inplace=True)

numeric_cols: List = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object','category']).columns.tolist()

# traits level columns
traits_col: list = [s for s in numeric_cols if "Set7" in s]
# units level columns
units_col: list = [s for s in numeric_cols if "TFT7" in s]
# augments columns
augments_col: list[str] = ['augment0', 'augment1', 'augment2']
# units items columns
items_col = [s for s in categorical_cols if s not in augments_col]

df_unique = X.nunique().to_frame().reset_index()
df_unique.columns = ['Variable','DistinctCount']
unique_items_set = {y for col in items_col for y in X[col].unique().tolist()}
unique_augments_set = {y for col in augments_col for y in X[col].unique().tolist()}

X[f'items_count'] = X[items_col].apply(lambda row: sum(x != 'None' for x in row), axis=1)
X[f'traits_sum'] = X[traits_col].sum(axis=1)
X[f'units_sum'] = X[units_col].sum(axis=1)
X.iloc[X[f'units_sum'].idxmax()]

augment0              TFT7_Augment_MageConference
augment1          TFT6_Augment_CelestialBlessing1
augment2                TFT6_Augment_JeweledLotus
Set7_Assassin                                 0.0
Set7_Astral                                   1.0
                               ...               
TFT7_Zoe_item1                               None
TFT7_Zoe_item2                               None
items_count                                    12
traits_sum                                    9.0
units_sum                                    29.0
Name: 4308, Length: 270, dtype: object

In [261]:
numeric_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object','category']).columns.tolist()
X[numeric_cols] = X[numeric_cols].applymap(np.int64)

matches_df = X.copy()
matches_df[TARGETNAME] = y

# Augments Ranking

## Stage 2-1 augment ranking

In [262]:
# filter and melt the dataframe
m = matches_df.filter(regex=r'placement|augment0').melt('placement', value_name='augment0')
# group and aggregate mean/median
dct = {'Value_Count': ('augment0', 'count'), 'average_placement': ('placement', 'mean')}
augment0_rank_df = m.groupby('augment0', as_index=False).agg(**dct).sort_values(by='average_placement')
augment0_rank_df[:30]

  m = matches_df.filter(regex=r'placement|augment0').melt('placement', value_name='augment0')


Unnamed: 0,augment0,Value_Count,average_placement
136,TFT7_Augment_MageConferenceHR,6,3.333333
179,TFT7_Augment_TempestTrait,24,3.375
146,TFT7_Augment_MysticTrait2,20,3.5
126,TFT7_Augment_GuildLootHR,3,3.666667
157,TFT7_Augment_RevelEmblem2,151,3.748344
68,TFT6_Augment_TriForce2,232,3.87931
75,TFT6_Augment_Windfall,11,3.909091
140,TFT7_Augment_MikaelsGift,159,3.918239
165,TFT7_Augment_ShapeshifterBeastsDen,159,3.937107
117,TFT7_Augment_GadgetExpert,467,3.976445


In [263]:
augment0_rank_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_augment0_ranking.csv'), index=False)

## Stage 3-2 augment ranking

In [264]:
# filter and melt the dataframe
m = matches_df.filter(regex=r'placement|augment1').melt('placement', value_name='augment1')
# group and aggregate mean/median
dct = {'Value_Count': ('augment1', 'count'), 'average_placement': ('placement', 'mean')}
augment1_rank_df = m.groupby('augment1', as_index=False).agg(**dct).sort_values(by='average_placement')
augment1_rank_df[:30]

  m = matches_df.filter(regex=r'placement|augment1').melt('placement', value_name='augment1')


Unnamed: 0,augment1,Value_Count,average_placement
151,TFT7_Augment_Preparation3HR,1,1.0
149,TFT7_Augment_Preparation2HR,2,2.5
65,TFT6_Augment_Traitless1,26,2.615385
154,TFT7_Augment_RagewingScorch,17,3.235294
70,TFT6_Augment_TriForce3,24,3.291667
67,TFT6_Augment_Traitless3,9,3.333333
66,TFT6_Augment_Traitless2,8,3.375
69,TFT6_Augment_TriForce2,33,3.393939
162,TFT7_Augment_ScalescornEmblem2,39,3.487179
176,TFT7_Augment_TempestEmblem2,4,3.5


In [265]:
augment1_rank_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_augment1_ranking.csv'), index=False)

## Stage 4-2 augment ranking

In [266]:
# filter and melt the dataframe
m = matches_df.filter(regex=r'placement|augment2').melt('placement', value_name='augment2')
# group and aggregate mean/median
dct = {'Value_Count': ('augment2', 'count'), 'average_placement': ('placement', 'mean')}
augment2_rank_df = m.groupby('augment2', as_index=False).agg(**dct).sort_values(by='average_placement')
augment2_rank_df[:30]

  m = matches_df.filter(regex=r'placement|augment2').melt('placement', value_name='augment2')


Unnamed: 0,augment2,Value_Count,average_placement
58,TFT6_Augment_Traitless3,4,1.25
102,TFT7_Augment_EvokerTrait,3,1.666667
57,TFT6_Augment_Traitless2,15,2.466667
26,TFT6_Augment_Featherweights3,5,2.6
106,TFT7_Augment_GuardianEmblem2,5,3.2
61,TFT6_Augment_TriForce3,4,3.5
126,TFT7_Augment_MysticTrait,30,3.5
144,TFT7_Augment_ShapeshifterBeastsDen,149,3.563758
140,TFT7_Augment_RevelTrait,7,3.571429
101,TFT7_Augment_EvokerEssenceTheft,15,3.666667


In [267]:
augment2_rank_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_augment2_ranking.csv'), index=False)

# Items Ranking

In [268]:
def get_unit_items_ranking(df: matches_df, unit: str):
    # filter and melt the dataframe
    df = df.filter(regex=f'placement|{unit}_item0|{unit}_item1|{unit}_item2')
    df[f'unit'] = f'{unit}' # fill in current unit
    # join 3 items to 1 column
    df[f'{unit}_items'] = df[[f'{unit}_item0', f'{unit}_item1', f'{unit}_item2']].apply(lambda row: ', '.join(row.values.astype(str)), axis=1)
    # sort items for unique combination
    df[f'{unit}_items'] = df[f'{unit}_items'].apply(lambda x: ', '.join(sorted(x.split(', '))))
    df = df.filter(regex=f'placement|{unit}_items|unit')
    m = df.melt(
        ['placement',f'unit'], value_name=f'{unit}_items_grp') #, value_vars=[f'{unit}_items', f'{unit}']
    # group and aggregate mean/median average_placement
    dct = {'value_count': (f'{unit}_items_grp', 'count'),
           'average_placement': ('placement', 'mean')}
    return m.groupby([f'unit', f'{unit}_items_grp'], as_index=False).agg(**dct).sort_values(by='average_placement')

In [269]:
get_unit_items_ranking(df = matches_df[:2], unit='TFT7_Zoe')

Unnamed: 0,unit,TFT7_Zoe_items_grp,value_count,average_placement
1,TFT7_Zoe,"None, None, StatikkShiv",1,7.0
0,TFT7_Zoe,"None, None, None",1,8.0


In [270]:
# Get top5 value_count >= 12
top5_items_list = []
for unit in units_col:
    df = get_unit_items_ranking(df = matches_df, unit=unit)
    df = df[df['value_count']>=12][:5] #Top 5 with counts >= 12
    top5_items_list.extend(df.values)

In [271]:
top5_items_list = pd.DataFrame(top5_items_list, columns=['unit', 'items',	'value_count',	'average_placement'])

In [272]:
top5_items_list.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_top5_items.csv'), index=False)

## Top 1 items

In [273]:
top5_items_list.groupby('unit').head(1)

Unnamed: 0,unit,items,value_count,average_placement
0,TFT7_Aatrox,"None, None, None",23414,4.492526
2,TFT7_Anivia,"Morellonomicon, None, None",17,4.0
7,TFT7_AoShin,"JeweledGauntlet, SpearOfShojin, UnstableConcoction",15,2.066667
12,TFT7_Ashe,"None, None, Zephyr",27,3.666667
17,TFT7_AurelionSol,"HextechGunblade, MageEmblemItem, SpearOfShojin",16,3.9375
22,TFT7_Bard,"None, None, ThiefsGloves",13,1.923077
27,TFT7_Braum,"IonicSpark, None, None",20,3.5
32,TFT7_Corki,"InfinityEdge, RunaansHurricane, UnstableConcoction",18,3.055556
37,TFT7_Diana,"ArchangelsStaff, FrozenHeart, IonicSpark",23,2.434783
42,TFT7_DragonBlue,"GuinsoosRageblade, MadredsBloodrazor, UnstableConcoction",17,3.176471


# Load TFT asset

In [274]:
tft_assets = read_json(os.path.join(ASSETS_DIR, f'en_us.json'))

In [275]:
tft7_set = tft_assets['setData'][6]['champions'] #['apiName'] ['traits']['name'] #['champions'] #['champions'].name

In [276]:
champions_dict = {}

for champion in tft7_set:
    if champion["apiName"] not in champions_dict:
        champions_dict[champion["apiName"]] = []
    for trait in champion["traits"]:
        champions_dict[champion["apiName"]].append(trait)

In [277]:
champions_dict['TFT7_Olaf']

['Scalescorn', 'Bruiser', 'Warrior']

# Team Composition Ranking

In [278]:
def add_traits(units_str):
    # for units in units_str.split(', '):
    comp_array = []
    if len(units_str) == 0:
        return ''
    for unit in units_str.split(', '):
        traits_array = []
        for trait in champions_dict[unit]:
            traits_array.append(trait[:2]+trait[-1:]) # Add first 2 char for trait
        traits_str = '-'.join(traits_array) + f'-{unit}'
        comp_array.append(traits_str)

    # print(f'{"".join(comp_array)}')
    return ','.join(comp_array)

def get_unit_comp_ranking(df: DataFrame, add_trait=True):
    # filter and melt the dataframe
    df = df.filter(['placement']+units_col)
    # join units lvl > 0 to 1 column
    df['comp'] = df[units_col].apply(lambda row: ', '.join(row[row > 0].index.values.astype(str)), axis=1)
    if add_trait:
        df['comp'] = df['comp'].apply(add_traits)

    df['comp'] = df['comp'].str.replace('TFT7_','') # remove prefix .split('_',1).str[-1]
    df = df.filter(['placement', 'comp'])
    m = df.melt(
        ['placement'], value_name=f'comp_grp')
    # group and aggregate mean/median average_placement
    dct = {'value_count': (f'comp_grp', 'count'),
           'average_placement': ('placement', 'mean')}
    return m.groupby([f'comp_grp'], as_index=False).agg(**dct).sort_values(by='average_placement')

In [279]:
# Get top5 
comp_df = get_unit_comp_ranking(df = matches_df)

In [280]:
top5_comp_list = []
m = comp_df[comp_df['value_count']>=1] #[:5] #Top 5 with counts >= 12
top5_comp_list.extend(m.values)
comp_ranking_df = pd.DataFrame(top5_comp_list, columns=['comp', 'value_count', 'average_placement'])

In [281]:
comp_ranking_df #.groupby('comp').head(1)

Unnamed: 0,comp,value_count,average_placement
0,"Tet-Drn-AoShin,Gud-Myc-Bad-Bard,Asl-Brr-Illaoi,Whs-Asn-Pyke,Gud-Mae-Ryze,Whs-Mae-Brr-Sylas,Gud-Asn-Talon,She-Spf-Mae-Zoe",1,1.0
1,"Gud-Myc-Bad-Bard,Rag-Car-Hecarim,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Gud-Mae-Ryze,Gud-Car-Sejuani,Gud-Asn-Talon,Gud-Swt-Twitch,Mie-Drr-War-Yasuo",1,1.0
2,"Gud-Myc-Bad-Bard,Rag-Car-Hecarim,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Rag-Brr-War-Shen,Rag-Shr-Drn-Shyvana,Rag-Swt-Xayah,Mie-Drr-War-Yasuo",1,1.0
3,"She-War-Aatrox,Scn-Gun-Braum,Mie-Drn-DragonBlue,Mie-Gun-Leona,Mie-Car-Nunu,Scn-Brr-War-Olaf,Rag-Brr-War-Shen,Mie-War-Yone",1,1.0
4,"Gud-Myc-Bad-Bard,Rag-Car-Hecarim,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Gud-Mae-Ryze,Gud-Car-Sejuani,Rag-Brr-War-Shen,Gud-Asn-Talon,Gud-Swt-Twitch,Rag-Swt-Xayah,Mi...",1,1.0
...,...,...,...
11394,"Jae-Shr-Gnar,Asl-Brr-Illaoi,Jae-Drr-Karma,Jae-Shr-Neeko,Asl-Brr-Skarner,Whs-Mae-Brr-Sylas",1,8.0
11395,"Jae-Shr-Gnar,Asl-Brr-Illaoi,Asl-Mae-Myc-Nami,Jae-Shr-Neeko,Asl-Shr-Nidalee,Asl-Mae-Vladimir,Mie-Drr-War-Yasuo",1,8.0
11396,"Jae-Myc-Drn-DragonGreen,Whs-Shr-Elise,Jae-Shr-Neeko,Whs-Mae-Brr-Sylas,Jae-Gun-Taric,Whs-Gun-Thresh",1,8.0
11397,"Jae-Shr-Gnar,Jae-Drr-Karma,Tet-Asn-Qiyana,Rag-Drr-Sett,Rag-Drr-Shr-Swain,Jae-Gun-Taric,She-Drr-Led-Volibear",1,8.0


In [282]:
composition_ranking_df = comp_ranking_df.copy()

# Team composition Clustering

In [283]:
# from sentence_transformers import SentenceTransformer

# embedder = SentenceTransformer('all-MiniLM-L6-v2')

In [284]:
# corpus_sentences = list(comp_ranking_df['comp'])

# corpus_embeddings = embedder.encode(corpus_sentences, batch_size=512, show_progress_bar=True, convert_to_tensor=True)
# Normalize the embeddings to unit length
# corpus_embeddings = corpus_embeddings /  np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)

In [285]:
# clustering_model = DBSCAN(eps=0.023, min_samples=1, metric='cosine', n_jobs=-1)
# predict=clustering_model.fit_predict(corpus_embeddings)
# comp_ranking_df['group'] = pd.Series(predict, index=comp_ranking_df.index)

In [286]:
# comp_ranking_df.sort_values(by='group')

In [287]:
# comp_ranking_df['grp_count'] = comp_ranking_df.groupby(['group'], as_index=False)['value_count'].transform('sum')
# comp_ranking_df['grp_placement'] = comp_ranking_df.groupby(['group'], as_index=False)['average_placement'].transform('mean')

In [288]:
# comp_ranking_df.sort_values(by='group')[:60]

In [289]:
# top5_comp_ranking_list = []
# m = comp_ranking_df[comp_ranking_df['grp_count']>=10] #[:5] #Top 5 with counts >= 12
# top5_comp_ranking_list.extend(m.values)
# top_comp_ranking_df = pd.DataFrame(top5_comp_ranking_list, columns=['comp', 'value_count', 'average_placement', 'group', 'grp_count', 'grp_placement'])

In [290]:
def remove_traits(units_str):
    """Remove units traits from text seperated by comma

    Args:
        units_str (str): traits-unit,traits-unit

    Returns:
        str: Units stripped of traits
    """    
    if len(units_str) == 0:
        return ''

    units_array = []
    for unit in units_str.split(','):
        units_array.append(unit.split('-')[-1])
    units = ', '.join(units_array)
    return units

# top_comp_ranking_df['comp'] = top_comp_ranking_df['comp'].apply(remove_traits)


In [291]:
# top_comp_ranking_df['mode'] = top_comp_ranking_df.groupby('group')['comp'].transform(lambda x: pd.Series.mode(x)[0])
# top_comp_ranking_df.groupby(['group']).head(1).sort_values(by='grp_placement')

In [292]:
# comp_ranking_df['comp'] = comp_ranking_df['comp'].apply(remove_traits)
# comp_ranking_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_comp_ranking.csv'), index=False)

In [293]:
# from statistics import mode
# import jellyfish

# import pandas as pd

# df = pd.DataFrame({'Code': ['abc', 'abc', 'abc', 'abcc', 'abcc', 'zxc'],
#                    'Description': ['ABC String', 'abc string', 'ABC String and sth', 'abc sth else', 'zxc sth else', 'zxc zxc'],
#                 #    'Value': [10, 20, 30, 40, 100]
#                    })

# df_list = []
# for grp,df in df.groupby('Code'):
#     df['distance'] = df['Description'].apply(lambda x : fuzz.token_set_ratio(x, mode(df['Description'])))
#     # df['Description'] =  mode(df['Description'])
#     df_list.append(df[df['distance'] > 10])

# df = pd.concat(df_list)

In [294]:
# comp_ranking_df.groupby('comp')['comp'].apply(lambda x : fuzz.token_set_ratio(x, ','.join(units_col)))

In [295]:
# df_list = []
# for grp,df in comp_ranking_df.groupby('comp'):
#     df['distance'] = df['comp'].apply(lambda x : fuzz.token_set_ratio(x, 'Aatrox, DragonGold, Kayn, Shen, Twitch, Xayah, Zoe'))
#     # df['Description'] =  mode(df['Description'])
#     df_list.append(df[df['distance'] > 10])

# df = pd.concat(df_list)

In [296]:
composition_ranking_df

Unnamed: 0,comp,value_count,average_placement
0,"Tet-Drn-AoShin,Gud-Myc-Bad-Bard,Asl-Brr-Illaoi,Whs-Asn-Pyke,Gud-Mae-Ryze,Whs-Mae-Brr-Sylas,Gud-Asn-Talon,She-Spf-Mae-Zoe",1,1.0
1,"Gud-Myc-Bad-Bard,Rag-Car-Hecarim,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Gud-Mae-Ryze,Gud-Car-Sejuani,Gud-Asn-Talon,Gud-Swt-Twitch,Mie-Drr-War-Yasuo",1,1.0
2,"Gud-Myc-Bad-Bard,Rag-Car-Hecarim,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Rag-Brr-War-Shen,Rag-Shr-Drn-Shyvana,Rag-Swt-Xayah,Mie-Drr-War-Yasuo",1,1.0
3,"She-War-Aatrox,Scn-Gun-Braum,Mie-Drn-DragonBlue,Mie-Gun-Leona,Mie-Car-Nunu,Scn-Brr-War-Olaf,Rag-Brr-War-Shen,Mie-War-Yone",1,1.0
4,"Gud-Myc-Bad-Bard,Rag-Car-Hecarim,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Gud-Mae-Ryze,Gud-Car-Sejuani,Rag-Brr-War-Shen,Gud-Asn-Talon,Gud-Swt-Twitch,Rag-Swt-Xayah,Mi...",1,1.0
...,...,...,...
11394,"Jae-Shr-Gnar,Asl-Brr-Illaoi,Jae-Drr-Karma,Jae-Shr-Neeko,Asl-Brr-Skarner,Whs-Mae-Brr-Sylas",1,8.0
11395,"Jae-Shr-Gnar,Asl-Brr-Illaoi,Asl-Mae-Myc-Nami,Jae-Shr-Neeko,Asl-Shr-Nidalee,Asl-Mae-Vladimir,Mie-Drr-War-Yasuo",1,8.0
11396,"Jae-Myc-Drn-DragonGreen,Whs-Shr-Elise,Jae-Shr-Neeko,Whs-Mae-Brr-Sylas,Jae-Gun-Taric,Whs-Gun-Thresh",1,8.0
11397,"Jae-Shr-Gnar,Jae-Drr-Karma,Tet-Asn-Qiyana,Rag-Drr-Sett,Rag-Drr-Shr-Swain,Jae-Gun-Taric,She-Drr-Led-Volibear",1,8.0


# KMode
using - comp string

In [297]:
# # Elbow curve to find optimal K
# cost = []
# K = range(5,30,5)
# for num_clusters in list(K):
#     kmode = KModes(n_clusters=num_clusters, init = "random", n_init = 5, verbose=1)
#     kmode.fit_predict(composition_ranking_df)
#     cost.append(kmode.cost_)
    
# plt.plot(K, cost, 'bx-')
# plt.xlabel('No. of clusters')
# plt.ylabel('Cost')
# plt.title('Elbow Method For Optimal k')
# plt.show()

In [298]:
# # Building the model with 3 clusters
# kmode = KModes(n_clusters=15, init = "random", n_init = 5, verbose=1)
# clusters = kmode.fit_predict(composition_ranking_df)
# clusters

In [299]:
# kmode_ranking_df = composition_ranking_df.copy()
# kmode_ranking_df.insert(0, "group", clusters, True)

In [300]:
# kmode_ranking_df

In [301]:
# kmode_ranking_df['grp_count'] = kmode_ranking_df.groupby(['group'], as_index=False)['value_count'].transform('sum')
# kmode_ranking_df['grp_placement'] = kmode_ranking_df.groupby(['group'], as_index=False)['average_placement'].transform('mean')

In [302]:
# top5_comp_ranking_list = []
# m = comp_ranking_df[comp_ranking_df['grp_count']>=10] #[:5] #Top 5 with counts >= 12
# top5_comp_ranking_list.extend(m.values)
# top_comp_ranking_df = pd.DataFrame(top5_comp_ranking_list, columns=['comp', 'value_count', 'average_placement', 'group', 'grp_count', 'grp_placement'])

In [303]:
# kmode_ranking_df['comp'] = kmode_ranking_df['comp'].apply(remove_traits)
# kmode_ranking_df['mode'] = kmode_ranking_df.groupby('group')['comp'].transform(lambda x: pd.Series.mode(x)[0])
# kmode_ranking_df.groupby(['group']).head(1).sort_values(by='grp_placement')

# _

In [304]:
def get_unit_composition(df: matches_df):
    # filter and melt the dataframe
    df = df.filter(['placement']+units_col+traits_col)
    return df.sort_values(by='placement')

In [305]:
units_comp_df:DataFrame = get_unit_composition(matches_df)

In [306]:
units_comp_df

Unnamed: 0,placement,TFT7_Aatrox,TFT7_Anivia,TFT7_AoShin,TFT7_Ashe,TFT7_AurelionSol,TFT7_Bard,TFT7_Braum,TFT7_Corki,TFT7_Diana,...,Set7_Scalescorn,Set7_Shapeshifter,Set7_Shimmerscale,Set7_SpellThief,Set7_Starcaller,Set7_Swiftshot,Set7_Tempest,Set7_Trainer,Set7_Warrior,Set7_Whispers
18367,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,1,0,1,0
21360,1,0,0,0,0,0,2,0,2,0,...,0,0,0,0,0,0,0,1,0,1
2817,1,0,0,0,0,0,0,0,0,3,...,2,0,0,0,0,0,1,0,2,0
8264,1,0,0,2,0,0,0,0,0,0,...,0,0,0,1,0,0,2,1,0,0
8262,1,0,0,0,0,0,2,0,3,0,...,0,0,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2482,8,0,0,0,0,0,0,0,2,0,...,0,0,0,0,0,0,0,2,0,0
21628,8,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9948,8,0,0,0,0,0,0,0,1,0,...,0,0,1,1,0,0,0,1,0,0
18085,8,0,0,0,0,0,0,2,0,2,...,2,0,0,0,0,0,0,0,1,0


## KMode
using integers

In [307]:
units_composition_df = units_comp_df.copy()

In [308]:
# # Elbow curve to find optimal K
# cost = []
# K = range(5,40,10)
# for num_clusters in list(K):
#     kmode = KModes(n_clusters=num_clusters, init = "random", n_init = 5, verbose=1)
#     kmode.fit_predict(units_composition_df)
#     cost.append(kmode.cost_)
    
# plt.plot(K, cost, 'bx-')
# plt.xlabel('No. of clusters')
# plt.ylabel('Cost')
# plt.title('Elbow Method For Optimal k')
# plt.show()

In [309]:
# Building the model with 3 clusters
kmode = KModes(n_clusters=25, init = "random", n_init = 5, verbose=1)
clusters = kmode.fit_predict(units_composition_df)
clusters

Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 8268, cost: 203268.0
Run 1, iteration: 2/100, moves: 1979, cost: 202595.0
Run 1, iteration: 3/100, moves: 221, cost: 202595.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 10010, cost: 206387.0
Run 2, iteration: 2/100, moves: 2181, cost: 205683.0
Run 2, iteration: 3/100, moves: 356, cost: 205574.0
Run 2, iteration: 4/100, moves: 42, cost: 205574.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 7969, cost: 205444.0
Run 3, iteration: 2/100, moves: 1811, cost: 204654.0
Run 3, iteration: 3/100, moves: 486, cost: 204639.0
Run 3, iteration: 4/100, moves: 23, cost: 204639.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 4, iteration: 1/100, moves: 8634, cost: 207439.0
Run 4, iteration: 2/100, moves: 3006, cost: 205544

array([22, 11, 16, ...,  2, 10, 15], dtype=uint16)

In [310]:
kmode_ranking_df = units_composition_df.copy()
kmode_ranking_df.insert(0, "group", clusters, True)

In [311]:
kmode_ranking_df

Unnamed: 0,group,placement,TFT7_Aatrox,TFT7_Anivia,TFT7_AoShin,TFT7_Ashe,TFT7_AurelionSol,TFT7_Bard,TFT7_Braum,TFT7_Corki,...,Set7_Scalescorn,Set7_Shapeshifter,Set7_Shimmerscale,Set7_SpellThief,Set7_Starcaller,Set7_Swiftshot,Set7_Tempest,Set7_Trainer,Set7_Warrior,Set7_Whispers
18367,22,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,1,0,1,0
21360,11,1,0,0,0,0,0,2,0,2,...,0,0,0,0,0,0,0,1,0,1
2817,16,1,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,1,0,2,0
8264,13,1,0,0,2,0,0,0,0,0,...,0,0,0,1,0,0,2,1,0,0
8262,11,1,0,0,0,0,0,2,0,3,...,0,0,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2482,4,8,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,2,0,0
21628,14,8,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9948,2,8,0,0,0,0,0,0,0,1,...,0,0,1,1,0,0,0,1,0,0
18085,10,8,0,0,0,0,0,0,2,0,...,2,0,0,0,0,0,0,0,1,0


In [312]:
def get_unit_composition_ranking(df: DataFrame, add_trait=True):
    # filter and melt the dataframe
    df = df.filter(['placement', 'group']+units_col)
    # join units lvl > 0 to 1 column
    df['comp'] = df[units_col].apply(lambda row: ', '.join(
        row[row > 0].index.values.astype(str)), axis=1)

    if add_trait:
        df['comp'] = df['comp'].apply(add_traits)

    df['comp'] = df['comp'].str.replace('TFT7_', '')
    df = df.filter(['placement', 'group', 'comp'])
    return df.sort_values(by='group')

In [313]:
kmode_ranking_df = get_unit_composition_ranking(kmode_ranking_df, add_trait=False)

In [314]:
kmode_ranking_df

Unnamed: 0,placement,group,comp
10614,2,0,"Elise, Gnar, Neeko, Nidalee, Pyke, Shyvana, Swain, Taric"
17212,3,0,"Ashe, DragonGreen, Elise, Gnar, Neeko, Nidalee, Swain, Talon"
4670,6,0,"DragonPurple, Elise, Karma, LeeSin, Neeko, Swain, Volibear, Yasuo"
4078,3,0,"Elise, Gnar, Neeko, Nidalee, Shyvana, Soraka, Swain, Yasuo"
12870,2,0,"DragonGreen, Elise, Gnar, Neeko, Nidalee, Pyke, Soraka"
...,...,...,...
17860,7,24,"Heimerdinger, Illaoi, Lillia, Lulu, Nami, Ryze, Sylas, TrainerDragon, Vladimir"
7183,7,24,"Bard, Heimerdinger, Illaoi, Lillia, Nami, Ryze, Sylas, Vladimir"
16032,8,24,"Heimerdinger, Illaoi, Nami, Ryze, Sylas, Vladimir"
6012,6,24,"Heimerdinger, Illaoi, Nami, Pyke, Ryze, Soraka, Sylas, Vladimir"


In [315]:
kmode_ranking_df['grp_count'] = kmode_ranking_df.groupby(['group'], as_index=False)['group'].transform('count')
kmode_ranking_df['grp_placement'] = kmode_ranking_df.groupby(['group'], as_index=False)['placement'].transform('mean')

In [316]:
kmode_ranking_df

Unnamed: 0,placement,group,comp,grp_count,grp_placement
10614,2,0,"Elise, Gnar, Neeko, Nidalee, Pyke, Shyvana, Swain, Taric",873,4.302405
17212,3,0,"Ashe, DragonGreen, Elise, Gnar, Neeko, Nidalee, Swain, Talon",873,4.302405
4670,6,0,"DragonPurple, Elise, Karma, LeeSin, Neeko, Swain, Volibear, Yasuo",873,4.302405
4078,3,0,"Elise, Gnar, Neeko, Nidalee, Shyvana, Soraka, Swain, Yasuo",873,4.302405
12870,2,0,"DragonGreen, Elise, Gnar, Neeko, Nidalee, Pyke, Soraka",873,4.302405
...,...,...,...,...,...
17860,7,24,"Heimerdinger, Illaoi, Lillia, Lulu, Nami, Ryze, Sylas, TrainerDragon, Vladimir",1053,5.764482
7183,7,24,"Bard, Heimerdinger, Illaoi, Lillia, Nami, Ryze, Sylas, Vladimir",1053,5.764482
16032,8,24,"Heimerdinger, Illaoi, Nami, Ryze, Sylas, Vladimir",1053,5.764482
6012,6,24,"Heimerdinger, Illaoi, Nami, Pyke, Ryze, Soraka, Sylas, Vladimir",1053,5.764482


In [317]:
top5_comp_ranking_list = []
m = kmode_ranking_df[kmode_ranking_df['grp_count']>=10] #[:5] #Top 5 with counts >= 12
top5_comp_ranking_list.extend(m.values)
top_kmode_ranking_df = pd.DataFrame(top5_comp_ranking_list, columns=['placement', 'group', 'comp', 'grp_count', 'grp_placement'])

In [318]:
kmode_ranking_df['mode'] = kmode_ranking_df.groupby('group')['comp'].transform(lambda x: pd.Series.mode(x)[0])
kmode_ranking_df.groupby(['group']).head(1).sort_values(by='grp_placement')

Unnamed: 0,placement,group,comp,grp_count,grp_placement,mode
14372,3,1,"AurelionSol, Bard, Heimerdinger, Illaoi, Lulu, Nami, Ornn, Sona, TrainerDragon",1373,2.659869,"AoShin, Bard, Heimerdinger, Lulu, Ornn, Sylas, TrainerDragon, Zoe"
12985,1,21,"Braum, Diana, Lillia, Nunu, Olaf, Sejuani, Shen, Talon",420,2.771429,"Braum, Diana, Lillia, Olaf, Sejuani, Shen, Talon"
19958,6,6,"Bard, Hecarim, Ornn, Sejuani, Shen, Twitch, Xayah, Yasuo",676,3.414201,"Bard, Hecarim, Ornn, Qiyana, Sejuani, Shen, Talon, Twitch, Xayah"
7296,8,16,"Diana, Olaf, Ornn, Qiyana, Shen, Sylas, Talon",806,3.619107,"Diana, Olaf, Ornn, Pyke, Qiyana, Shen, Sylas, Talon"
11818,4,11,"Bard, Corki, DragonGold, Lulu, Sona, Thresh, TrainerDragon, Tristana",746,3.80429,"Bard, Braum, Corki, DragonGold, Lulu, Sona, TrainerDragon, Tristana"
18898,5,17,"AoShin, Bard, Lulu, Ornn, Sona, Sylas, TrainerDragon, Tristana, Zoe",686,3.842566,"AoShin, Bard, Lulu, Nami, Ornn, Sylas, Zoe"
16820,8,19,"DragonPurple, Elise, Neeko, Ornn, Qiyana, Talon",678,3.941003,"Diana, DragonPurple, Kayn, Ornn, Pyke, Qiyana, Talon"
4107,5,8,"Bard, Corki, DragonGreen, Gnar, Lulu, Neeko, Soraka, TrainerDragon, Tristana",826,4.193705,"Anivia, Ashe, DragonGreen, Gnar, Lulu, Neeko, Soraka"
12947,3,3,"Bard, Illaoi, Lulu, Nami, Ornn, Qiyana, Talon, Twitch, Varus",1950,4.272308,"Heimerdinger, Illaoi, Lulu, Nami, Sylas, TrainerDragon, Twitch, Varus"
10614,2,0,"Elise, Gnar, Neeko, Nidalee, Pyke, Shyvana, Swain, Taric",873,4.302405,"Elise, Gnar, Neeko, Nidalee, Shyvana, Soraka, Swain"


In [319]:
kmode_ranking_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_kmode_comp_ranking.csv'), index=False)

## KMeans

In [320]:
units_composition_df = units_comp_df.copy()
X = units_composition_df.copy()
X.pop(TARGETNAME)

18367    1
21360    1
2817     1
8264     1
8262     1
        ..
2482     8
21628    8
9948     8
18085    8
16059    8
Name: placement, Length: 23616, dtype: int64

In [321]:
# normalization to improve the k-means result.
normalizer = Normalizer(copy=False)
# kms = KMeans(n_clusters=num_clusters, init = "k-means++", n_init = 10, verbose=1)
# kmeans = make_pipeline(normalizer, kms)

In [322]:
# # Elbow curve to find optimal K
# cost = []
# K = range(5,40,10)
# for num_clusters in list(K):
#     kmeans.fit_predict(X)
#     cost.append(kmeans[-1].inertia_)
    
# plt.plot(K, cost, 'bx-')
# plt.xlabel('No. of clusters')
# plt.ylabel('Cost')
# plt.title('Elbow Method For Optimal k')
# plt.show()

In [323]:
# Building the model with 30 clusters
kms = KMeans(n_clusters=30, init = "k-means++", n_init = 10, verbose=1)
kmeans = make_pipeline(normalizer, kms)
clusters = kmeans.fit_predict(X)
clusters

Initialization complete
Iteration 0, inertia 10351.530001147366.
Iteration 1, inertia 6807.854379875499.
Iteration 2, inertia 6663.144344320976.
Iteration 3, inertia 6630.644921974086.
Iteration 4, inertia 6614.647704639863.
Iteration 5, inertia 6605.660020229707.
Iteration 6, inertia 6602.459406346255.
Iteration 7, inertia 6600.969262177219.
Iteration 8, inertia 6600.233786979943.
Iteration 9, inertia 6599.766704450784.
Iteration 10, inertia 6599.273263034257.
Iteration 11, inertia 6599.079516007365.
Iteration 12, inertia 6598.7775967701355.
Iteration 13, inertia 6598.385055333133.
Iteration 14, inertia 6598.02526269256.
Iteration 15, inertia 6597.780933351165.
Iteration 16, inertia 6597.543638577995.
Iteration 17, inertia 6597.36769127527.
Iteration 18, inertia 6596.953384454617.
Iteration 19, inertia 6596.686150965153.
Iteration 20, inertia 6596.444589375736.
Iteration 21, inertia 6596.258646244335.
Iteration 22, inertia 6596.12655691169.
Iteration 23, inertia 6596.059629822346.
Ite

array([26, 19,  8, ...,  9, 11, 15])

In [324]:
kmeans_ranking_df = units_composition_df.copy()
kmeans_ranking_df.insert(0, "group", clusters, True)

In [325]:
kmeans_ranking_df

Unnamed: 0,group,placement,TFT7_Aatrox,TFT7_Anivia,TFT7_AoShin,TFT7_Ashe,TFT7_AurelionSol,TFT7_Bard,TFT7_Braum,TFT7_Corki,...,Set7_Scalescorn,Set7_Shapeshifter,Set7_Shimmerscale,Set7_SpellThief,Set7_Starcaller,Set7_Swiftshot,Set7_Tempest,Set7_Trainer,Set7_Warrior,Set7_Whispers
18367,26,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,1,0,1,0
21360,19,1,0,0,0,0,0,2,0,2,...,0,0,0,0,0,0,0,1,0,1
2817,8,1,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,1,0,2,0
8264,5,1,0,0,2,0,0,0,0,0,...,0,0,0,1,0,0,2,1,0,0
8262,9,1,0,0,0,0,0,2,0,3,...,0,0,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2482,19,8,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,2,0,0
21628,29,8,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9948,9,8,0,0,0,0,0,0,0,1,...,0,0,1,1,0,0,0,1,0,0
18085,11,8,0,0,0,0,0,0,2,0,...,2,0,0,0,0,0,0,0,1,0


In [326]:
kmeans_ranking_df = get_unit_composition_ranking(kmeans_ranking_df, add_trait=False)

In [327]:
kmeans_ranking_df

Unnamed: 0,placement,group,comp
21891,2,0,"Heimerdinger, Illaoi, Lulu, Nami, Ryze, Sona, Sylas, TrainerDragon, Vladimir"
22798,6,0,"Heimerdinger, Illaoi, Lillia, Nami, Ryze, Sylas, Vladimir"
21290,3,0,"Illaoi, Nami, Ryze, Shen, Skarner, Sylas, Vladimir, Yasuo"
21011,8,0,"Heimerdinger, Illaoi, Lulu, Nami, Ornn, Ryze, TrainerDragon, Vladimir"
11105,3,0,"Heimerdinger, Illaoi, Lulu, Nami, Ryze, Soraka, Sylas, TrainerDragon, Vladimir, Zoe"
...,...,...,...
15148,4,29,"DragonBlue, Hecarim, Nunu, Ryze, Sejuani, Twitch, Yasuo"
11191,5,29,"DragonBlue, Hecarim, Nunu, Sona, Yasuo, Yone"
12284,3,29,"DragonBlue, Hecarim, Lillia, Nunu, Ornn, Shen, Yone"
4512,5,29,"DragonBlue, Hecarim, Nunu, Sejuani, Twitch, Yasuo, Yone"


In [328]:
kmeans_ranking_df['grp_count'] = kmeans_ranking_df.groupby(['group'], as_index=False)['group'].transform('count')
kmeans_ranking_df['grp_placement'] = kmeans_ranking_df.groupby(['group'], as_index=False)['placement'].transform('mean')

In [329]:
kmeans_ranking_df['mode'] = kmeans_ranking_df.groupby('group')['comp'].transform(lambda x: pd.Series.mode(x)[0])
kmeans_ranking_df.groupby(['group']).head(1).sort_values(by='grp_placement')

Unnamed: 0,placement,group,comp,grp_count,grp_placement,mode
12940,4,23,"AoShin, Bard, Heimerdinger, Lulu, Ornn, Sylas, TrainerDragon, Zoe",967,3.327818,"AoShin, Bard, Heimerdinger, Lulu, Ornn, Sylas, TrainerDragon, Zoe"
13980,5,20,"AoShin, Ezreal, LeeSin, Ornn, Pyke, Qiyana, Shen",814,3.599509,"AoShin, Bard, Lulu, Nami, Ornn, Sylas, Zoe"
16911,2,5,"AoShin, Heimerdinger, Lulu, Nami, Ornn, Ryze, Sylas, TrainerDragon, Zoe",851,3.623972,"AoShin, Heimerdinger, Lulu, Nami, Ornn, Sylas, TrainerDragon, Zoe"
6601,1,8,"Braum, Diana, Olaf, Ornn, Qiyana, Shen, Talon, Yasuo",763,4.058978,"Braum, Diana, Lillia, Olaf, Ornn, Qiyana, Shen, Talon"
13676,6,4,"Anivia, Bard, DragonGreen, Gnar, Lulu, Neeko, Talon",473,4.103594,"Anivia, Bard, DragonGreen, Gnar, Lulu, Neeko, Yasuo"
6953,6,7,"Illaoi, Nami, Ornn, Shen, Sylas, Twitch, Varus",581,4.142857,"Bard, Illaoi, Nami, Ornn, Qiyana, Talon, Twitch, Varus"
13207,2,25,"Diana, Kayn, Olaf, Ornn, Pyke, Qiyana, Talon, Thresh",629,4.187599,"Diana, Olaf, Ornn, Pyke, Qiyana, Shen, Sylas, Talon"
16687,6,10,"Bard, Hecarim, Neeko, Ornn, Qiyana, Sona, Thresh, Volibear, Xayah, Yasuo",298,4.255034,
11770,2,22,"AurelionSol, Heimerdinger, Illaoi, Lulu, Nami, Sylas, TrainerDragon, Tristana, Varus",1432,4.355447,"Heimerdinger, Illaoi, Lulu, Nami, Sylas, TrainerDragon, Twitch, Varus"
8206,7,1,"DragonPurple, Olaf, Ornn, Qiyana, Sylas, Talon",1155,4.413853,"DragonPurple, Ornn, Pyke, Qiyana, Shen, Sylas, Thresh"


In [330]:
kmeans_ranking_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_kmeans_comp_ranking.csv'), index=False)

## DBSCAN

In [331]:
units_composition_df = units_comp_df.copy()
X = units_composition_df.copy()
X.pop(TARGETNAME)

18367    1
21360    1
2817     1
8264     1
8262     1
        ..
2482     8
21628    8
9948     8
18085    8
16059    8
Name: placement, Length: 23616, dtype: int64

In [332]:
units_composition_df.shape

(23616, 88)

In [333]:
# Building the model with 3 clusters
# normalization to improve the k-means result.
normalizer = Normalizer(copy=False)
dbs= DBSCAN(eps=0.37, metric='euclidean', min_samples=3, n_jobs=-1) #eps=0.053, metric='cosine'
dbscan = make_pipeline(normalizer, dbs)
clusters = dbscan.fit_predict(X)
clusters

array([ 0,  2,  1, ..., -1,  1,  5], dtype=int64)

In [334]:
dbscan_ranking_df = units_composition_df.copy()
dbscan_ranking_df.insert(0, "group", clusters, True)

In [335]:
dbscan_ranking_df

Unnamed: 0,group,placement,TFT7_Aatrox,TFT7_Anivia,TFT7_AoShin,TFT7_Ashe,TFT7_AurelionSol,TFT7_Bard,TFT7_Braum,TFT7_Corki,...,Set7_Scalescorn,Set7_Shapeshifter,Set7_Shimmerscale,Set7_SpellThief,Set7_Starcaller,Set7_Swiftshot,Set7_Tempest,Set7_Trainer,Set7_Warrior,Set7_Whispers
18367,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,1,0,1,0
21360,2,1,0,0,0,0,0,2,0,2,...,0,0,0,0,0,0,0,1,0,1
2817,1,1,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,1,0,2,0
8264,2,1,0,0,2,0,0,0,0,0,...,0,0,0,1,0,0,2,1,0,0
8262,2,1,0,0,0,0,0,2,0,3,...,0,0,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2482,-1,8,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,2,0,0
21628,10,8,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9948,-1,8,0,0,0,0,0,0,0,1,...,0,0,1,1,0,0,0,1,0,0
18085,1,8,0,0,0,0,0,0,2,0,...,2,0,0,0,0,0,0,0,1,0


In [336]:
dbscan_ranking_df = get_unit_composition_ranking(dbscan_ranking_df, add_trait=False)

In [337]:
dbscan_ranking_df

Unnamed: 0,placement,group,comp
14572,7,-1,"Corki, DragonGold, Heimerdinger, Ornn, Qiyana, Sona, Sylas, TrainerDragon, Tristana, Zoe"
10834,2,-1,"Bard, DragonGreen, Gnar, Lulu, Neeko, Soraka, Thresh, Yasuo"
22750,8,-1,"AoShin, Hecarim, Lulu, Neeko, Nunu, Ryze, Sejuani, Sona"
6985,7,-1,"Ashe, DragonGold, Neeko, Swain, Taric, Xayah, Yasuo"
8548,2,-1,"DragonPurple, LeeSin, Qiyana, Ryze, Sylas, Talon, Yasuo"
...,...,...,...
2134,8,178,"Ashe, Kayn, Senna, Sett, Shen, Swain"
18236,8,178,"Ashe, Hecarim, Kayn, Senna, Sett, Shen, Swain"
20470,8,179,"DragonBlue, DragonGold, DragonGreen, Leona, Soraka"
18257,7,179,"DragonBlue, DragonGold, DragonGreen, Leona"


In [338]:
dbscan_ranking_df['grp_count'] = dbscan_ranking_df.groupby(['group'], as_index=False)['group'].transform('count')
dbscan_ranking_df['grp_placement'] = dbscan_ranking_df.groupby(['group'], as_index=False)['placement'].transform('mean')
dbscan_ranking_df['mode'] = dbscan_ranking_df.groupby('group')['comp'].transform(lambda x: pd.Series.mode(x)[0])
dbscan_ranking_df.groupby(['group']).head(1).sort_values(by='grp_count', ascending=False)[:60]

Unnamed: 0,placement,group,comp,grp_count,grp_placement,mode
7584,2,3,"DragonGreen, Elise, Gnar, Neeko, Nidalee, Pyke, Soraka, Thresh",6147.0,3.96368,"Elise, Gnar, Neeko, Nidalee, Shyvana, Soraka, Swain"
14061,3,0,"Hecarim, Ornn, Qiyana, Sejuani, Shen, Talon, Twitch, Xayah, Yasuo",6078.0,4.510529,"Hecarim, Ornn, Qiyana, Sejuani, Shen, Talon, Twitch, Xayah"
6557,7,1,"Braum, Diana, Lillia, Olaf, Sejuani, Shen",2137.0,4.231839,"Braum, Diana, Lillia, Olaf, Sejuani, Shen, Talon"
3399,1,2,"AoShin, Bard, Heimerdinger, Lulu, Ornn, Sylas, TrainerDragon, Zoe",1941.0,4.27005,"AoShin, Bard, Heimerdinger, Lulu, Ornn, Sylas, TrainerDragon, Zoe"
16713,4,9,"AurelionSol, Illaoi, Nami, Nidalee, Skarner, Varus, Vladimir",1879.0,4.535912,"AurelionSol, Illaoi, Nami, Nidalee, Skarner, Varus, Vladimir"
7285,7,8,"DragonPurple, Elise, Neeko, Ornn, Sylas, Thresh",1873.0,4.351251,"DragonPurple, Elise, Neeko, Soraka, Sylas, Taric, Thresh"
17553,1,11,"Ashe, DragonGreen, Gnar, Karma, Neeko, Soraka, Taric, Yasuo",848.0,4.205063,"Anivia, Ashe, DragonGreen, Gnar, Lulu, Neeko, Soraka"
3604,3,4,"DragonBlue, DragonGold, DragonPurple, Leona, Sylas, Twitch",413.0,4.764706,"DragonBlue, DragonGold, DragonPurple, Leona, Sylas"
14202,6,12,"Hecarim, Kayn, Sett, Shen, Shyvana, Swain, Xayah, Yasuo",395.0,4.284211,"Hecarim, Kayn, Sett, Shen, Shyvana, Swain, Xayah"
4477,7,7,"Heimerdinger, Illaoi, Lulu, Nami, Ornn, TrainerDragon, Twitch, Varus",274.0,4.245595,"Heimerdinger, Illaoi, Lulu, Nami, Sylas, TrainerDragon, Twitch, Varus"


In [339]:
dbscan_ranking_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_{THREEDAY}_dbscan_comp_ranking.csv'), index=False)