In [4]:
import torch
import librosa

import json
import io
from tqdm import tqdm

In [5]:
dataset_path = "../data/pokemon_data.json"

In [6]:
def load_json(path):
    # Opening JSON file
    with open(path, 'r') as openfile:
        # Reading from json file
        json_object = json.load(openfile)
        return json_object
    
def save_json(object_, path):
    # Serializing json
    json_object = json.dumps(object_, indent=4)
    
    # Writing to sample.json
    with open(path, "w") as outfile:
        outfile.write(json_object)

In [7]:
all_pokemon_data = load_json(dataset_path)

In [8]:
# generate embeddings for all pokedex entries
# compare pokedex entry embeddings
all_features = {}
for pid in tqdm(all_pokemon_data):
    try:
        sound_data, s = librosa.load("../data/client_data/cries/%s.mp3"%pid, sr=16000)
    except:
        continue
        
    mfcc = librosa.feature.mfcc(y=sound_data, sr=s)
    all_features[pid] = mfcc

  return f(*args, **kwargs)
  return f(*args, **kwargs)
100%|██████████████████████████████████████████████████████████████████████████████████████| 899/899 [00:30<00:00, 29.55it/s]


In [33]:
# compare pokedex entry features
all_similarities = {}
max_p = .95 # max similarity probability
for pid1 in tqdm(all_features):
    sound1_feature = all_features[pid1]
    
    similarities = []
    for pid2 in all_features:
        if pid1 == pid2:
            continue
        sound2_feature = all_features[pid2]
        
        # Use DTW to align the MFCCs
        matrix, wp = librosa.sequence.dtw(sound1_feature, sound2_feature, backtrack=True)
        cost = matrix[-1][-1]

        # Compute the similarity score as the sum of the alignment costs along the optimal path
        similarity = -1 * cost / len(wp)
        similarities.append((pid2, similarity))
        
    _, min_sim = min(similarities, key = lambda x: x[1])
    _, max_sim = max(similarities, key = lambda x: x[1])
    max_sim -= min_sim
    
    similarities = [(pid, ((sim - min_sim) * max_p) / max_sim) for pid, sim in similarities]
    
    similarities.sort(key = lambda x: x[1], reverse=True)
    all_similarities[pid1] = similarities

100%|██████████████████████████████████████████████████████████████████████████████████████| 898/898 [02:15<00:00,  6.64it/s]


In [34]:
save_json(all_similarities, "../data/client_data/cry_similarities.json")

In [32]:
# explore results
import IPython.display as display

chosen_pid = "12"
display.display(display.Audio("../data/client_data/cries/%s.mp3"%chosen_pid, autoplay=False))
print("========")
for pid, similarity in all_similarities[chosen_pid][:20]:
    print(pid, similarity)
    display.display(display.Audio("../data/client_data/cries/%s.mp3"%pid, autoplay=False))
    print()

10 0.98



707 0.9779350049093026



881 0.9531063896859749



548 0.9516632624000313



615 0.938599178048387



873 0.9301004324104932



50 0.9236870169207834



480 0.9079055726850309



868 0.9076584218703979



583 0.9075006196862866



401 0.8919049145767551



43 0.8914806131163882



26 0.8887479206989852



850 0.8851552562631656



433 0.8825470672243747



595 0.8820389297253025



857 0.8799016041633645



51 0.87559060501095



811 0.8722584348907161



897 0.8688680038874897



