In [8]:
import pandas as pd
from pathlib import Path
import numpy as np
import melee
from tqdm import tqdm
from multiprocessing import Pool
from slippi.parse import parse, ParseEvent
from slippi import Game
from tabulate import tabulate
from src.dataset.tools.melee_dataset import MeleeDataset


In [4]:
dataset_path = Path("../slp-dataset/")
training_path = dataset_path / "training_data"
training_files = list(training_path.glob("*.slp"))
print(f"{len(training_files)} files in dataset")


95102 files in dataset


In [21]:
# Print out game file names -- these seem pretty random, will need to parse these 
# individually and setup a database to figure out distibrutions of matchups, etc.
random_idx = np.random.choice(len(training_files), size=10)


In [24]:

for i in random_idx:
    print(training_files[i].stem + training_files[i].suffix)
    game = Game(str(training_files[i]))
    ports = []
    players = []    
    for i, player in enumerate(game.start.players):
        if player is not None:    
            players.append(player)
            ports.append(i)

    p1 = players[0].character.name
    p1_end_stocks = game.frames[len(game.frames)-1].ports[ports[0]].leader.post.stocks
    p2_end_stocks = game.frames[len(game.frames)-1].ports[ports[1]].leader.post.stocks
    p2 = players[1].character.name    
    stage = game.start.stage.name
    print(f"{p1} {p1_end_stocks} {p2} {p2_end_stocks} {stage}")
        
    # print(game.frames[len(game.frames)-1])
    

Game_20191013T201756.slp
FALCO 3 FALCO 0 FOUNTAIN_OF_DREAMS
19_17_24 [LEAN] Fox + [YUNG] Falco (YS).slp
FOX 2 FALCO 0 YOSHIS_STORY
Game_20190824T090751.slp
FOX 4 YOSHI 4 BATTLEFIELD
Fox vs Puff (ACAB) [DL] Game_20181024T213120.slp
FOX 0 JIGGLYPUFF 2 DREAM_LAND_N64
Game_20190323T123710.slp
PEACH 2 FOX 1 BATTLEFIELD
Game_20190823T143225.slp
FOX 0 MARTH 2 POKEMON_STADIUM
Game_20190323T201724.slp
MARTH 0 FOX 2 FOUNTAIN_OF_DREAMS
Game_19490902T130404.slp
FOX 2 FALCO 2 FINAL_DESTINATION
Game_20190614T220025.slp
FOX 4 FOX 0 YOSHIS_STORY
Game_20190420T215103.slp
GANONDORF 1 GANONDORF 0 BATTLEFIELD


In [26]:
def work(path):    
    try:    
        game = Game(str(path))    
        ports = []
        players = []    
        for i, player in enumerate(game.start.players):
            if player is not None:    
                players.append(player)
                ports.append(i)

        p1 = players[0].character.name
        p1_end_stocks = game.frames[len(game.frames)-1].ports[ports[0]].leader.post.stocks
        p2_end_stocks = game.frames[len(game.frames)-1].ports[ports[1]].leader.post.stocks
        p2 = players[1].character.name
        stage = game.start.stage.name
        return [path, p1, p1_end_stocks, p2, p2_end_stocks, stage]    
        
    except:    
        return [path, "err", "err", "err", "err", "err"]  # some files seem corrupted

dataset = pd.DataFrame(columns=["path", "P1", "P1_END_STOCKS", "P2", "P2_END_STOCKS" "stage"])    

pool = Pool(8)    
for idx, output in enumerate(tqdm(pool.imap_unordered(work, training_files), total=len(training_files))):
    dataset.loc[idx] = output

# There is certainly a faster way to do this using ParseEvent (probably)

100%|██████████| 95102/95102 [1:26:45<00:00, 18.27it/s]


In [27]:
# Save to file
dataset_no_err = dataset[dataset.P1 != "err"]
print(f"{len(dataset) - len(dataset_no_err)} bad examples (?)")
dataset_no_err.to_csv(dataset_path / "dataset.csv")

32 bad examples (?)


In [6]:
# Generate labels and save
df = pd.read_csv("../slp-dataset/dataset.csv")
filtered = df.copy()
label = []
for i in range(len(df)):
    row = df.iloc[i]
    if ((row['P1_END_STOCKS'] == 0 and row['P2_END_STOCKS'] != 0) 
        or (row['P1_END_STOCKS'] != 0 and row['P2_END_STOCKS'] == 0)):
        if row['P1_END_STOCKS'] == 0:
            label.append(0)  # lose
        else:
            label.append(1)  # win 
    else:
        # game incomplete
        label.append(-1)
    
filtered["label"] = label
filtered = filtered[filtered["label"] != -1]
print(len(filtered))
filtered.to_csv(dataset_path / "training.csv")      
print(filtered.iloc[0])

86847
Unnamed: 0                                                       0
path             ../slp-dataset/training_data/19_43_23 Marth + ...
P1                                                           MARTH
P1_END_STOCKS                                                    3
P2                                                           FALCO
P2_END_STOCKS                                                    0
stage                                                  BATTLEFIELD
label                                                            1
Name: 0, dtype: object


In [42]:
data = {}
for character in dataset.P1.unique():
    data[character] = (len(dataset[dataset.P1 == character]) + len(dataset[dataset.P2 == character]))/len(dataset)

headers = ['Character', '%']
data_t = sorted([(k,v) for k,v in data.items()], key=lambda x:x[1], reverse=True)
print(tabulate(data_t, headers=headers))

Character                 %
--------------  -----------
FOX             0.557517
FALCO           0.379918
MARTH           0.29727
CAPTAIN_FALCON  0.251761
SHEIK           0.132857
PEACH           0.0742781
JIGGLYPUFF      0.056308
SAMUS           0.0383904
GANONDORF       0.0299889
ICE_CLIMBERS    0.0278648
LUIGI           0.0257408
PIKACHU         0.020641
YOSHI           0.0171816
DR_MARIO        0.0138483
ZELDA           0.012965
DONKEY_KONG     0.0121659
LINK            0.00908498
MARIO           0.00876953
GAME_AND_WATCH  0.00677168
ROY             0.00479485
YOUNG_LINK      0.00465816
KIRBY           0.0044058
NESS            0.00374335
MEWTWO          0.00359614
BOWSER          0.00288112
PICHU           0.00192425
err             0.000672962


In [9]:
dataset = MeleeDataset("../slp-dataset/training.csv")
print(len(dataset))
print(dataset[1])

86847
Unnamed: 0                                                       1
Unnamed: 0.1                                                     1
path             ../slp-dataset/training_data/Game_20190706T111...
P1                                                             FOX
P1_END_STOCKS                                                    3
P2                                                           PEACH
P2_END_STOCKS                                                    0
stage                                                 YOSHIS_STORY
label                                                            1
Name: 1, dtype: object
