In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns
from umap import UMAP

# Set the root directory
ROOT_DIR = 'egs/adain-vc/exp/sample_one_to_one-attack_target/emb'

# Function to load feature data from files
def load_features(root_dir):
    spkids = []
    features = []
    files = []

    for spkid in os.listdir(root_dir):
        spkid_dir = os.path.join(root_dir, spkid)
        if not os.path.isdir(spkid_dir):
            
            continue

        for feat_file in os.listdir(spkid_dir):
            files.append(feat_file)

            feat_file = os.path.join(spkid_dir, feat_file)

            spkids.append(spkid)
            features.append(np.load(feat_file))

    return spkids, features, files



In [None]:
from matplotlib import pyplot as plt 
import seaborn as sns 


%matplotlib inline 

adv_types = []
spkids = []
feats = []
files = []

def_spk = os.listdir(f'egs/adain-vc/exp/sample_per_speaker-attack_target/emb')
adv_spk = [spk for spk in os.listdir(f'egs/adain-vc/exp/sample_per_speaker-origin/emb') if spk not in def_spk]

types = ["none", "per speaker", "per_utterance", "untarget"] # "one_to_one", "untarget"]
exps = ["sample_per_speaker-origin", "sample_per_speaker-attack_target", "sample_per_utterance-attack_target", "sample_untarget-attack_untarget"]# "sample_one_to_one-attack_target", "sample_untarget-attack_untarget"]


for adv_type, exp in zip(types, exps):
    spks,features, fs = load_features(f'egs/adain-vc/exp/{exp}/emb')
    adv_types.extend([adv_type] * len(spks))

    print(len(spks))


    spks = list(map(lambda x: x if x not in adv_spk else x+" (adv)", spks))

    spkids.extend(spks)
    feats.extend(features)
    files.extend(fs)

import pandas as pd 

df = pd.DataFrame(dict(
    adv_type=adv_types,
    spkid = spkids, 
    feat = feats,
    file = files
))


for adv_type, exp in zip(types, exps):
    sub_df = df[(df.adv_type == adv_type) | (df.adv_type=="none")]
    X = np.array(sub_df.feat.to_list())
    # print(X.shape)

    # print(X)

    # print(sub_df.feat.apply(np.array))
    plt.subplots()
    
    print("Perfoming t-SNE")
    reducer = UMAP(n_components=2, random_state=42, min_dist=0.5)
    X_reduce = reducer.fit_transform(X)
    
    g=sns.scatterplot(x=X_reduce[:, 0], y=X_reduce[:, 1], hue=sub_df.spkid, style=sub_df.adv_type)
    g.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

    for i,fp in enumerate(sub_df.file):
        # print(fp)
        g.text(X_reduce[i,0]+0.02, X_reduce[i,1]+0.02, fp, size=2)

    plt.savefig(adv_type+".pdf")



In [None]:
X = np.array(feats)
print("Perfoming t-SNE")
reducer = TSNE(n_components=2, random_state=42, verbose=1)
X_reduce = reducer.fit_transform(X)




In [None]:
from matplotlib import pyplot as plt 
import seaborn as sns 

%matplotlib inline 
g=sns.scatterplot(x=X_reduce[:, 0], y=X_reduce[:, 1], hue=spkids, style=adv_types)
g.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)
# for i,fp in enumerate(files):
    # print(fp)
    # g.text(X_tsne[i,0]+0.02, X_tsne[i,1]+0.02, fp, size=5)
# plt.title("Something")


## Resemblyzer


In [None]:
from resemblyzer import preprocess_wav, VoiceEncoder
import resemblyzer as rblz
import os 
from glob import glob 
import numpy as np 
from tqdm import tqdm
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns
import pandas as pd 

from umap import UMAP

encoder = VoiceEncoder()
def_spk = os.listdir(f'egs/adain-vc/exp/sample_per_speaker-attack_target/emb')
adv_spk = [spk for spk in os.listdir(f'egs/adain-vc/exp/sample_per_speaker-origin/emb') if spk not in def_spk]


def load_wavs(root_dir):
    spkids = []
    features = []

    for spkid in tqdm(os.listdir(root_dir)):
        spkid_dir = os.path.join(root_dir, spkid)
        if not os.path.isdir(spkid_dir):
            continue

        if spkid in adv_spk:
            continue

        # if spkid in adv_spk:
        #     spkid = spkid + " (adv)"

        for feat_file in os.listdir(spkid_dir):

            feat_file = os.path.join(spkid_dir, feat_file)

            spkids.append(spkid)
            features.append(
                encoder.embed_utterance(preprocess_wav(feat_file))
            )


    return spkids, features 


adv_types = []
spkids = []
feats = []
types = ["origin", "per speaker", "per_utterance", "untarget"] # "one_to_one", "untarget"]
exps = ["sample_per_speaker-origin", "sample_per_speaker-attack_target", "sample_per_utterance-attack_target", "sample_untarget-attack_untarget"]# "sample_one_to_one-attack_target", "sample_untarget-attack_untarget"]


for adv_type, exp in zip(types, exps):
    if adv_type == "origin":
        wav_types = ['wav', 'wav_syn']
    else:
        wav_types = ['wav_syn']
    
    for wav_type in wav_types:
        spk,feat = load_wavs(f'egs/adain-vc/exp/{exp}/{wav_type}')
        
        adv_types.extend([f"syn ({adv_type})" if wav_type == "wav_syn" else adv_type] * len(spk))

        spkids.extend(spk)
        feats.extend(feat)  

df = pd.DataFrame(dict(
    adv_type=adv_types,
    spkid = spkids, 
    feat = feats,
    # file = files
))



for adv_type, exp in zip(types, exps):
    sub_df = df[(df.adv_type.str.contains(adv_type)) | (df.adv_type.str.contains("origin"))]
    X = np.array(sub_df.feat.to_list())
    # print(X.shape)

    # print(X)

    # print(sub_df.feat.apply(np.array))
    plt.subplots()
    
    print("Perfoming t-SNE")
    reducer = UMAP(n_components=2, random_state=42, min_dist=0.1)
    X_reduce = reducer.fit_transform(X)
    
    g=sns.scatterplot(x=X_reduce[:, 0], y=X_reduce[:, 1], hue=sub_df.spkid, style=sub_df.adv_type)
    g.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

    # for i,fp in enumerate(sub_df.file):
        # print(fp)
        # g.text(X_reduce[i,0]+0.02, X_reduce[i,1]+0.02, fp, size=2)

    plt.savefig(adv_type+".pdf")

