In [1]:
import pandas as pd
import json
import os
import matplotlib.cm
import pickle
from collections import defaultdict

# Each Audio Clip is used in multiple places.

Map each audio to all characters that use that clip.

Map each audio to all descriptions that use that clip.

In [2]:
with open('./duplicate-sounds-lookup.pkl', 'rb') as f:
    audio_groups = pickle.load(f)
    audio_groups = [[f[len('./tmp/'):] for f in group] for group in audio_groups]
    
audio_files = [
    file
    for group in audio_groups
    for file in group
]
print('Game has', len(audio_files), 'audio files. And', len(audio_groups), 'unique audio sounds.')

Game has 103945 audio files. And 11019 unique audio sounds.


In [3]:
print('Every sound belongs to one-to-many characters')

CHARS = {
    'brave': 'Hero',
    'buddy': 'Banjo & Kazooie',
    'captain': 'Captain Falcon',
    'dedede': 'King Dedede',
    'demon': 'Kazuya',
    'dolly': 'Terry',
    'donkey': 'Donkey Kong',
    'diddy': 'Diddy Kong',
    'duckhunt': 'Duck Hunt',
    'edge': 'Sephiroth',
    'eflame': 'Pyra / Mithra',
    'element': 'Pyra / Mithra',
    'elight': 'Pyra / Mithra',
    'gamewatch': 'Mr. Game & Watch',
    'ganon': 'Ganondorf',
    'gaogaen': 'Inceneroar',
    'gekkouga': 'Greninja',
    'jack': 'Joker',
    'kamui': 'Corrin',
    'koopa': 'Bowser',
    'koopag': 'Bowser',  # Mega-Bowser
    'koopajr': 'Bowser Jr.',
    'krool': 'King K. Rool',
    'littlemac': 'Little Mac',
    'mariod': 'Dr. Mario',
    'master': 'Byleth',
    'metaknight': 'Meta Knight',
    'mewtwo': 'Mewtwo',
    'mii': 'Mii',
    'miienemyf': 'Mii',
    'miienemyg': 'Mii',
    'miienemys': 'Mii',
    'miifighter': 'Mii',
    'miigunner': 'Mii',
    'miiswordsman': 'Mii',
    'murabito': 'Villager',
    'nana': 'Ice Climbers',
    'popo': 'Ice Climbers',
    'packun': 'Piranha Plant',
    'pacman': 'Pac Man',
    'pfushigisou': 'Pokemon Trainer',
    'plizardon': 'Pokemon Trainer',
    'ptrainer': 'Pokemon Trainer',
    'pzenigame': 'Pokemon Trainer',
    'pickel': 'Steve / Alex',
    'pikmin': 'Olimar',
    'pitb': 'Dark Pit',
    'purin': 'Jigglypuff',
    'reflet': 'Robin',
    'robot': 'Rob',
    'rockman': 'Mega Man',
    'rosetta': 'Rosalina & Luma',
    'samusd': 'Dark Samus',
    'shizue': 'Isabelle',
    'szerosuit': 'Zero Suit Samus',
    'tantan': 'Min Min',
    'toonlink': 'Toon Link',
    'trail': 'Sora',
    'wiifit': 'Wii Fit Trainer',
    'younglink': 'Young Link',
}

def file_to_character(file):
    result = file[3:].split('---', 1)[0].split('_')[0]
    if result in CHARS:
        return CHARS[result]
    return result[0].upper() + result[1:]


NUMS = ''.join(str(i) for i in range(10))
def file_to_desc(file):
    result = file[3:].split('---', 1)[1].split('_')[2].strip(NUMS)
    if result == 'sheildguard':
        return 'shieldguard'
    return result


file_to_characters = {}
file_to_tags = {}
for group in audio_groups:
    characters_in_group = set()
    tags = set()
    for file in group:
        characters_in_group.add(file_to_character(file))
        tags.add(file_to_desc(file))
        
    for file in group:
        file_no_ext = file.split('.', 1)[0]
        file_to_characters[file_no_ext] = list(characters_in_group)
    for file in group:
        file_no_ext = file.split('.', 1)[0]
        file_to_tags[file_no_ext] = list(tags)

Every sound belongs to one-to-many characters


In [4]:
pd.set_option('max_rows', 10000)
print('Audio clips per action')
pd.Series(file_to_tags.values()).apply(pd.Series).stack().value_counts()

Audio clips per action


special                   22226
step                       6709
appeal                     5882
attackair                  5355
smash                      4781
win                        4580
swing                      4355
final                      3862
attackhard                 3800
jump01.idsp                2859
.idsp                      2504
dash                       2504
throw                      2394
jump02.idsp                2336
final02.idsp               2118
landing01.idsp             2068
final03.idsp               2048
escapeair.idsp             2004
escape.idsp                2003
final04.idsp               1983
damage                     1932
attack01.idsp              1887
missfoot01.idsp            1878
attack03.idsp              1878
furasleep.idsp             1869
attack02.idsp              1851
attack05.idsp              1851
furafura.idsp              1851
win01.idsp                 1845
attack06.idsp              1842
knockout.idsp              1842
damagefl

In [5]:
pd.set_option('max_rows', 10000)
print('Audio clips per character')
pd.Series(file_to_characters.values()).apply(pd.Series).stack().value_counts()

Audio clips per character


Pyra / Mithra       9834
Pokemon Trainer     4032
Mii                 3812
Joker               3295
Bayonetta           2779
Banjo & Kazooie     2758
Cloud               2754
Pikachu             2729
Young Link          2621
Min Min             2585
Kazuya              2585
Robin               2522
Lucas               2504
Ice Climbers        2475
Meta Knight         2423
Bowser              2405
Greninja            2333
Little Mac          2315
Pit                 2261
Ness                2261
Lucario             2252
Bowser Jr.          2232
Luigi               2216
Wii Fit Trainer     2190
Hero                2169
Wario               2126
Kirby               2118
King Dedede         2117
Chrom               2090
Ken                 2088
Dark Samus          2081
Yoshi               2072
Diddy Kong          2072
Zero Suit Samus     2027
Jigglypuff          1991
Donkey Kong         1982
Mewtwo              1973
Roy                 1971
Richter             1944
Simon               1935


In [6]:
print('Voice channels & Sound-effects are not grouped together (except with the empty sound group)')
len([g for g in audio_groups if any(f.startswith('se_') for f in g) and any(f.startswith('vc_') for f in g)])

Voice channels & Sound-effects are not grouped together (except with the empty sound group)


1

# Enrich T-SNE Output with Characters + Tags + Colors

In [7]:
with open('./smash-audio-1d.json') as f:
    smash_1d = pd.DataFrame(json.load(f))

with open('./smash-audio-2d.json') as f:
    smash_2d = pd.DataFrame(json.load(f))

smash_2d.head()

Unnamed: 0,path,point
0,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5696493491718344, 0.4781135807713881]"
1,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5059782610530396, 0.9770618915967754]"
2,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.3650738872352471, 0.6661001788908766]"
3,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.9113770948051566, 0.5967620419447662]"
4,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5453826343644428, 0.12557459152143538]"


In [8]:
smash_2d['file'] = smash_2d['path'].apply(os.path.basename)
smash_2d['x'] = smash_2d['point'].apply(lambda o: o[0])
smash_2d['y'] = smash_2d['point'].apply(lambda o: o[1])
smash_2d.head()

Unnamed: 0,path,point,file,x,y
0,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5696493491718344, 0.4781135807713881]",vc_mii_m06_warrior_low---vc_mii_finalstart.wav,0.569649,0.478114
1,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5059782610530396, 0.9770618915967754]",vc_dedede_cheer---vc_dedede_cheer.wav,0.505978,0.977062
2,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.3650738872352471, 0.6661001788908766]",se_link---se_link_dash_soubi_02.wav,0.365074,0.6661
3,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.9113770948051566, 0.5967620419447662]",se_jack---se_jack_dash_turn_ft.wav,0.911377,0.596762
4,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5453826343644428, 0.12557459152143538]",se_cloud---se_cloud_win02.wav,0.545383,0.125575


In [9]:
cmap = matplotlib.cm.get_cmap('viridis')
smash_2d['color_scale'] = smash_2d.merge(smash_1d, suffixes=('_2d', ''), on='path')['x']
smash_2d['hex'] = smash_2d['color_scale'].apply(lambda c: matplotlib.colors.rgb2hex(cmap(c)))
smash_2d['chars'] = smash_2d['file'].apply(lambda f: file_to_characters[f[:-len('.wav')]])
smash_2d['tags'] = smash_2d['file'].apply(lambda f: file_to_tags[f[:-len('.wav')]])
smash_2d.head()

Unnamed: 0,path,point,file,x,y,color_scale,hex,chars,tags
0,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5696493491718344, 0.4781135807713881]",vc_mii_m06_warrior_low---vc_mii_finalstart.wav,0.569649,0.478114,0.569649,#1fa188,[Mii],[finalstart.idsp]
1,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5059782610530396, 0.9770618915967754]",vc_dedede_cheer---vc_dedede_cheer.wav,0.505978,0.977062,0.505978,#20928c,[King Dedede],[cheer.idsp]
2,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.3650738872352471, 0.6661001788908766]",se_link---se_link_dash_soubi_02.wav,0.365074,0.6661,0.365074,#2d708e,[Link],"[dash, landing]"
3,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.9113770948051566, 0.5967620419447662]",se_jack---se_jack_dash_turn_ft.wav,0.911377,0.596762,0.911377,#c5e021,[Joker],[dash]
4,/Users/ahmedelzeiny/Documents/practice/smash-a...,"[0.5453826343644428, 0.12557459152143538]",se_cloud---se_cloud_win02.wav,0.545383,0.125575,0.545383,#1e9b8a,[Cloud],[win02.idsp]


In [10]:
with open('sounds_of_smash.json', 'w') as f:
    output_json = smash_2d[['file', 'x', 'y', 'hex', 'chars', 'tags']].to_dict('records')
    json.dump(output_json, f)

# Explore file directory structure

In [11]:
dirs = defaultdict(list)
for file in audio_files:
    fdir, fname = file.split('---', 1)
    dirs[fdir].append(fname)

In [12]:
sorted(dirs.keys(), key=lambda s: s[3:])

['se_bayonetta',
 'vc_bayonetta',
 'se_bayonetta_c00',
 'vc_bayonetta_c00',
 'se_bayonetta_c01',
 'vc_bayonetta_c01',
 'se_bayonetta_c02',
 'vc_bayonetta_c02',
 'se_bayonetta_c03',
 'vc_bayonetta_c03',
 'se_bayonetta_c04',
 'vc_bayonetta_c04',
 'se_bayonetta_c05',
 'vc_bayonetta_c05',
 'se_bayonetta_c06',
 'vc_bayonetta_c06',
 'se_bayonetta_c07',
 'vc_bayonetta_c07',
 'vc_bayonetta_cheer',
 'vc_bayonetta_cheer_c00',
 'vc_bayonetta_cheer_c01',
 'vc_bayonetta_cheer_c02',
 'vc_bayonetta_cheer_c03',
 'vc_bayonetta_cheer_c04',
 'vc_bayonetta_cheer_c05',
 'vc_bayonetta_cheer_c06',
 'vc_bayonetta_cheer_c07',
 'se_brave',
 'vc_brave',
 'se_brave_c00',
 'vc_brave_c00',
 'se_brave_c01',
 'vc_brave_c01',
 'se_brave_c02',
 'vc_brave_c02',
 'se_brave_c03',
 'vc_brave_c03',
 'se_brave_c04',
 'vc_brave_c04',
 'se_brave_c05',
 'vc_brave_c05',
 'se_brave_c06',
 'vc_brave_c06',
 'se_brave_c07',
 'vc_brave_c07',
 'vc_brave_cheer',
 'vc_brave_cheer_c00',
 'vc_brave_cheer_c01',
 'vc_brave_cheer_c02',
 'vc_

# Most common sounds

In [13]:
print('Audio clips with the highest amounts of re-use')
pd.Series(len(g) for g in audio_groups).sort_values(ascending=False).head(5)

Audio clips with the highest amounts of re-use


607     1100
3995     162
764      153
6589     150
6647     125
dtype: int64

In [14]:
print('This is an empty audio clip with nothing in it.')
sorted(audio_groups[607])

This is an empty audio clip with nothing in it.


['se_cloud_c01---se_cloud_final04_02.idsp',
 'se_cloud_c01---se_cloud_final06.idsp',
 'se_cloud_c01---se_cloud_final_swing01.idsp',
 'se_cloud_c01---se_cloud_final_swing02.idsp',
 'se_cloud_c01---se_cloud_final_swing03.idsp',
 'se_cloud_c01_append---se_cloud_final04_02.idsp',
 'se_cloud_c01_append---se_cloud_final06.idsp',
 'se_cloud_c01_append---se_cloud_final_swing01.idsp',
 'se_cloud_c01_append---se_cloud_final_swing02.idsp',
 'se_cloud_c01_append---se_cloud_final_swing03.idsp',
 'se_cloud_c03---se_cloud_final04_02.idsp',
 'se_cloud_c03---se_cloud_final06.idsp',
 'se_cloud_c03---se_cloud_final_swing01.idsp',
 'se_cloud_c03---se_cloud_final_swing02.idsp',
 'se_cloud_c03---se_cloud_final_swing03.idsp',
 'se_cloud_c05---se_cloud_final04_02.idsp',
 'se_cloud_c05---se_cloud_final06.idsp',
 'se_cloud_c05---se_cloud_final_swing01.idsp',
 'se_cloud_c05---se_cloud_final_swing02.idsp',
 'se_cloud_c05---se_cloud_final_swing03.idsp',
 'se_cloud_c07---se_cloud_final04_02.idsp',
 'se_cloud_c07---

In [15]:
print('Pichus + Pikachus zap sound')
sorted(audio_groups[3995])

Pichus + Pikachus zap sound


['se_pichu---se_pichu_final04.idsp',
 'se_pichu---se_pichu_final04_02.idsp',
 'se_pichu---se_pichu_final04_03.idsp',
 'se_pichu---se_pichu_final04_04.idsp',
 'se_pichu---se_pichu_final04_05.idsp',
 'se_pichu---se_pichu_final04_06.idsp',
 'se_pichu---se_pichu_final04_07.idsp',
 'se_pichu---se_pichu_final04_08.idsp',
 'se_pichu---se_pichu_final04_09.idsp',
 'se_pichu_c00---se_pichu_final04.idsp',
 'se_pichu_c00---se_pichu_final04_02.idsp',
 'se_pichu_c00---se_pichu_final04_03.idsp',
 'se_pichu_c00---se_pichu_final04_04.idsp',
 'se_pichu_c00---se_pichu_final04_05.idsp',
 'se_pichu_c00---se_pichu_final04_06.idsp',
 'se_pichu_c00---se_pichu_final04_07.idsp',
 'se_pichu_c00---se_pichu_final04_08.idsp',
 'se_pichu_c00---se_pichu_final04_09.idsp',
 'se_pichu_c01---se_pichu_final04.idsp',
 'se_pichu_c01---se_pichu_final04_02.idsp',
 'se_pichu_c01---se_pichu_final04_03.idsp',
 'se_pichu_c01---se_pichu_final04_04.idsp',
 'se_pichu_c01---se_pichu_final04_05.idsp',
 'se_pichu_c01---se_pichu_final04

In [16]:
print('Kazuyas "WHOOSH" sound.')
sorted(audio_groups[764])

Kazuyas "WHOOSH" sound.


['se_demon---se_demon_appeal_S01.idsp',
 'se_demon---se_demon_appeal_S02.idsp',
 'se_demon---se_demon_appeal_S04.idsp',
 'se_demon---se_demon_attackair_L01.idsp',
 'se_demon---se_demon_attackdash01.idsp',
 'se_demon---se_demon_attackdash02.idsp',
 'se_demon---se_demon_attacksquat4.idsp',
 'se_demon---se_demon_attackstand1.idsp',
 'se_demon---se_demon_attackstand21.idsp',
 'se_demon---se_demon_attackstand22.idsp',
 'se_demon---se_demon_attackstep2S_02.idsp',
 'se_demon---se_demon_swing_short01.idsp',
 'se_demon---se_demon_swing_short02.idsp',
 'se_demon---se_demon_throw_b02.idsp',
 'se_demon---se_demon_throw_b03.idsp',
 'se_demon---se_demon_throw_catch_swing.idsp',
 'se_demon---se_demon_throwcommand_swing02.idsp',
 'se_demon_c00---se_demon_appeal_S01.idsp',
 'se_demon_c00---se_demon_appeal_S02.idsp',
 'se_demon_c00---se_demon_appeal_S04.idsp',
 'se_demon_c00---se_demon_attackair_L01.idsp',
 'se_demon_c00---se_demon_attackdash01.idsp',
 'se_demon_c00---se_demon_attackdash02.idsp',
 'se_d

In [17]:
print('Characters with Unique Sounds per Skin')
pd.Series([
    f.split('_', 1)[1].split('---')[0].split('_', 1)[0]
    for f in [[f for f in sorted(g)][0] for g in audio_groups]
    if '_c0' in f
]).value_counts()

Characters with Unique Sounds per Skin


eflame         305
koopajr        210
brave          150
master          80
nana            74
wiifit          55
reflet          44
littlemac       43
ike             43
kamui           39
cloud           34
ptrainer        28
edge            24
bayonetta       24
inkling         22
demon           20
packun          11
pickel           3
pikmin           1
jack             1
murabito         1
pfushigisou      1
dtype: int64