**# t-SNE on Audio Datasets

T-distributed Stochastic Neighbor Embedding (t-SNE) is a machine learning algorithm for visualization developed by Laurens van der Maaten and Geoffrey Hinton.[1] It is a nonlinear dimensionality reduction technique well-suited for embedding high-dimensional data for visualization in a low-dimensional space of two or three dimensions. Specifically, it models each high-dimensional object by a two- or three-dimensional point in such a way that similar objects are modeled by nearby points and dissimilar objects are modeled by distant points with high probability. Now We will create a t-SNE plot of a group of audio clips.

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.cm as cm
import fnmatch
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import librosa.display
from sklearn.manifold import TSNE
import json
# Importing library 
import csv 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob

In [2]:
cd ../input/toronto-emotional-speech-set-tess/'TESS Toronto emotional speech set data'/

/kaggle/input/toronto-emotional-speech-set-tess/TESS Toronto emotional speech set data


In [3]:
files = []
for root, dirnames, filenames in os.walk('.'):
    for filename in fnmatch.filter(filenames, '*.wav'):
        files.append(os.path.join(root, filename))

print("found %d .wav files"%(len(files)))

found 2800 .wav files


In [4]:
def get_features(y, sr):
    y = y[0:sr]  # analyze just first second
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.amplitude_to_db(S, ref=np.max)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
    delta_mfcc = librosa.feature.delta(mfcc, mode='nearest')
    delta2_mfcc = librosa.feature.delta(mfcc, order=2, mode='nearest')
    feature_vector = np.concatenate((np.mean(mfcc,1), np.mean(delta_mfcc,1), np.mean(delta2_mfcc,1)))
    feature_vector = (feature_vector-np.mean(feature_vector)) / np.std(feature_vector)
    return feature_vector

In [5]:
feature_vectors = []
sound_paths = []
for i,f in enumerate(files):
    if i % 100 == 0:
        print("get %d of %d = %s"%(i+1, len(files), f))
    y, sr = librosa.load(f)
    feat = get_features(y, sr)
    feature_vectors.append(feat)
    sound_paths.append(f)
        
print("calculated %d feature vectors"%len(feature_vectors))

get 1 of 2800 = ./YAF_sad/YAF_tip_sad.wav
get 101 of 2800 = ./YAF_sad/YAF_dip_sad.wav
get 201 of 2800 = ./YAF_pleasant_surprised/YAF_when_ps.wav
get 301 of 2800 = ./YAF_pleasant_surprised/YAF_laud_ps.wav
get 401 of 2800 = ./OAF_Pleasant_surprise/OAF_dip_ps.wav
get 501 of 2800 = ./OAF_Pleasant_surprise/OAF_death_ps.wav
get 601 of 2800 = ./OAF_happy/OAF_hit_happy.wav
get 701 of 2800 = ./OAF_happy/OAF_rough_happy.wav
get 801 of 2800 = ./OAF_neutral/OAF_south_neutral.wav
get 901 of 2800 = ./OAF_neutral/OAF_limb_neutral.wav
get 1001 of 2800 = ./YAF_angry/YAF_void_angry.wav
get 1101 of 2800 = ./YAF_angry/YAF_seize_angry.wav
get 1201 of 2800 = ./YAF_disgust/YAF_love_disgust.wav
get 1301 of 2800 = ./YAF_disgust/YAF_peg_disgust.wav
get 1401 of 2800 = ./OAF_angry/OAF_bath_angry.wav
get 1501 of 2800 = ./OAF_angry/OAF_vine_angry.wav
get 1601 of 2800 = ./OAF_Sad/OAF_such_sad.wav
get 1701 of 2800 = ./OAF_Sad/OAF_rush_sad.wav
get 1801 of 2800 = ./YAF_happy/YAF_neat_happy.wav
get 1901 of 2800 = ./YAF_

In [6]:
feature_vectors[1].shape

(39,)

In [7]:
model = TSNE(n_components=2, learning_rate=150, perplexity=30, verbose=2, angle=0.1).fit_transform(feature_vectors)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 2800 samples in 0.011s...
[t-SNE] Computed neighbors for 2800 samples in 0.322s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2800
[t-SNE] Computed conditional probabilities for sample 2000 / 2800
[t-SNE] Computed conditional probabilities for sample 2800 / 2800
[t-SNE] Mean sigma: 0.030072
[t-SNE] Computed conditional probabilities in 0.227s
[t-SNE] Iteration 50: error = 75.0900650, gradient norm = 0.0598386 (50 iterations in 2.848s)
[t-SNE] Iteration 100: error = 64.3100815, gradient norm = 0.0138958 (50 iterations in 1.344s)
[t-SNE] Iteration 150: error = 62.5010185, gradient norm = 0.0100426 (50 iterations in 1.231s)
[t-SNE] Iteration 200: error = 61.7766113, gradient norm = 0.0071227 (50 iterations in 1.198s)
[t-SNE] Iteration 250: error = 61.3847580, gradient norm = 0.0034649 (50 iterations in 1.200s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.384758
[t-SNE] Iteration 300: erro

In [8]:
symbol=[]
symbol=[1]*1400
x=[2]*1400
symbol.extend(x)

In [9]:
file=[1,2,3,4,5,6,7]
color=[]
for i in file:
    x=[i]*200
    color.extend(x)
color.extend(color)

In [10]:
len(color)

2800

In [11]:
x_axis=model[:,0]
y_axis=model[:,1]
import plotly.express as px
fig = px.scatter(x=x_axis, y=y_axis,color=color,symbol=symbol,opacity=0.7)
fig.show()

In [12]:
x_axis[0].shape

()

In [13]:
model = TSNE(n_components=3, learning_rate=150, perplexity=30, verbose=2, angle=0.1).fit_transform(feature_vectors)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 2800 samples in 0.010s...
[t-SNE] Computed neighbors for 2800 samples in 0.322s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2800
[t-SNE] Computed conditional probabilities for sample 2000 / 2800
[t-SNE] Computed conditional probabilities for sample 2800 / 2800
[t-SNE] Mean sigma: 0.030072
[t-SNE] Computed conditional probabilities in 0.200s
[t-SNE] Iteration 50: error = 75.4073792, gradient norm = 0.0535862 (50 iterations in 11.601s)
[t-SNE] Iteration 100: error = 63.3984871, gradient norm = 0.0047537 (50 iterations in 4.907s)
[t-SNE] Iteration 150: error = 62.2623177, gradient norm = 0.0025141 (50 iterations in 4.349s)
[t-SNE] Iteration 200: error = 61.8543358, gradient norm = 0.0014085 (50 iterations in 4.296s)
[t-SNE] Iteration 250: error = 61.6315308, gradient norm = 0.0011948 (50 iterations in 4.124s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.631531
[t-SNE] Iteration 300: err

In [14]:
files

['./YAF_sad/YAF_tip_sad.wav',
 './YAF_sad/YAF_keg_sad.wav',
 './YAF_sad/YAF_pole_sad.wav',
 './YAF_sad/YAF_puff_sad.wav',
 './YAF_sad/YAF_such_sad.wav',
 './YAF_sad/YAF_mob_sad.wav',
 './YAF_sad/YAF_third_sad.wav',
 './YAF_sad/YAF_beg_sad.wav',
 './YAF_sad/YAF_tire_sad.wav',
 './YAF_sad/YAF_near_sad.wav',
 './YAF_sad/YAF_fit_sad.wav',
 './YAF_sad/YAF_luck_sad.wav',
 './YAF_sad/YAF_talk_sad.wav',
 './YAF_sad/YAF_late_sad.wav',
 './YAF_sad/YAF_loaf_sad.wav',
 './YAF_sad/YAF_wife_sad.wav',
 './YAF_sad/YAF_bean_sad.wav',
 './YAF_sad/YAF_hush_sad.wav',
 './YAF_sad/YAF_turn_sad.wav',
 './YAF_sad/YAF_hole_sad.wav',
 './YAF_sad/YAF_make_sad.wav',
 './YAF_sad/YAF_fall_sad.wav',
 './YAF_sad/YAF_goose_sad.wav',
 './YAF_sad/YAF_south_sad.wav',
 './YAF_sad/YAF_met_sad.wav',
 './YAF_sad/YAF_door_sad.wav',
 './YAF_sad/YAF_home_sad.wav',
 './YAF_sad/YAF_pearl_sad.wav',
 './YAF_sad/YAF_lore_sad.wav',
 './YAF_sad/YAF_judge_sad.wav',
 './YAF_sad/YAF_gas_sad.wav',
 './YAF_sad/YAF_raise_sad.wav',
 './YAF_s

In [15]:
x_axis=model[:,0]
y_axis=model[:,1]
z_axis=model[:,2]
import plotly.express as px
fig = px.scatter_3d(x=x_axis, y=y_axis, z=z_axis,color=color,symbol=symbol,opacity=0.7)

#,color=emotion,symbol=age
# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()
# plt.figure(figsize = (10,10))
# plt.scatter(x_axis, y_axis)
# plt.show()