Distance metric을 기반으로 노래의 유사성을 측정하고 가장 가까운 노래와 가장 먼 노래를 살펴보는 간단한 실험이다. 오디오 피쳐를 사용하여 측정한다.

In [1]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt, seaborn as sns
import os
import librosa, librosa.display
import IPython.display as ipd

plt.rcParams['figure.figsize'] = (10, 3)

from sklearn import preprocessing
from scipy.spatial.distance import cdist

# 데이터

[GTZAN](https://www.tensorflow.org/datasets/catalog/gtzan?hl=ko) 데이터를 사용한다. 이미 추출된 피쳐를 불러온다. 총 1000곡이 있다.

In [2]:
# Read data
data = pd.read_csv('data/GTZAN_data/features_30_sec.csv')

In [3]:
features = data.iloc[:,2:-1]
features_scaled=preprocessing.scale(features)

In [4]:
features

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,0.350088,0.088757,0.130228,0.002827,1784.165850,129774.064525,2002.449060,85882.761315,3805.839606,9.015054e+05,...,0.752740,52.420910,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035
1,0.340914,0.094980,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,3550.522098,2.977893e+06,...,0.927998,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.106190,0.531217,45.786282
2,0.363637,0.085275,0.175570,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,3042.260232,7.840345e+05,...,2.451690,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.439720,46.639660,-2.231258,30.573025
3,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,2184.745799,1.493194e+06,...,0.780874,44.427753,-3.319597,50.206673,0.636965,37.319130,-0.619121,37.259739,-3.407448,31.949339
4,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,3579.757627,1.572978e+06,...,-4.520576,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.195160
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.352063,0.080487,0.079486,0.000345,2008.149458,282174.689224,2106.541053,88609.749506,4253.557033,1.222421e+06,...,1.789867,45.050526,-13.289984,41.754955,2.484145,36.778877,-6.713265,54.866825,-1.193787,49.950665
996,0.398687,0.075086,0.076458,0.000588,2006.843354,182114.709510,2068.942009,82426.016726,4149.338328,1.046621e+06,...,3.739020,33.851742,-10.848309,39.395096,1.881229,32.010040,-7.461491,39.196327,-2.795338,31.773624
997,0.432142,0.075268,0.081651,0.000322,2077.526598,231657.968040,1927.293153,74717.124394,4031.405321,8.042154e+05,...,1.838090,33.597008,-12.845291,36.367264,3.440978,36.001110,-12.588070,42.502201,-2.106337,29.865515
998,0.362485,0.091506,0.083860,0.001211,1398.699344,240318.731073,1818.450280,109090.207161,3015.631004,1.332712e+06,...,-2.812176,46.324894,-4.416050,43.583942,1.556207,34.331261,-5.041897,47.227180,-3.590644,41.299088


# 거리 유사성 측정

In [5]:
def songs_similarity(song_name, features, metric='cosine'):
    distances = cdist(features, features, metric=metric)
    dist_df = pd.DataFrame(distances)
    dist_df = dist_df.set_index(data.filename)
    dist_df.columns = data.filename
    series = dist_df[song_name].sort_values(ascending = True)
    series = series.drop(song_name)
    return series

## 코사인 거리 기반 (based on cosine distance)

In [6]:
distances = cdist(features_scaled, features_scaled, 'cosine') #cosine distance

In [7]:
dist_df = pd.DataFrame(distances)
dist_df = dist_df.set_index(data.filename)
dist_df.columns = data.filename
dist_df

filename,blues.00000.wav,blues.00001.wav,blues.00002.wav,blues.00003.wav,blues.00004.wav,blues.00005.wav,blues.00006.wav,blues.00007.wav,blues.00008.wav,blues.00009.wav,...,rock.00090.wav,rock.00091.wav,rock.00092.wav,rock.00093.wav,rock.00094.wav,rock.00095.wav,rock.00096.wav,rock.00097.wav,rock.00098.wav,rock.00099.wav
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
blues.00000.wav,0.000000,0.950769,0.410382,0.715138,0.974439,1.346688,1.219483,1.167626,0.358123,1.097889,...,1.082829,0.453831,0.421442,0.337410,0.428371,0.389058,0.359165,0.503706,7.150424e-01,0.695902
blues.00001.wav,0.950769,0.000000,1.096834,0.479097,0.919251,0.692144,0.681714,0.584742,0.879351,0.595832,...,1.098111,1.325126,1.370792,1.191698,1.330834,1.077301,1.222119,1.302573,5.004378e-01,0.688277
blues.00002.wav,0.410382,1.096834,0.000000,0.789589,0.599734,1.082019,1.028061,0.895554,0.531887,1.132532,...,1.032408,0.438926,0.409221,0.416707,0.485463,0.504293,0.433163,0.410017,7.836224e-01,0.678931
blues.00003.wav,0.715138,0.479097,0.789589,0.000000,0.873563,0.865204,0.699254,0.675434,0.647242,0.704816,...,1.320107,1.206516,1.151132,0.958014,1.172515,1.000287,0.979485,1.107821,4.977214e-01,0.816790
blues.00004.wav,0.974439,0.919251,0.599734,0.873563,0.000000,0.443934,0.517805,0.376545,0.970297,0.528343,...,0.912395,0.982634,0.861965,0.895316,1.034594,0.936546,0.936454,0.827056,8.468076e-01,0.938215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rock.00095.wav,0.389058,1.077301,0.504293,1.000287,0.936546,1.231012,1.251086,1.180628,0.380550,1.258421,...,0.976074,0.156273,0.148274,0.139406,0.148544,0.000000,0.097936,0.134759,6.501304e-01,0.477517
rock.00096.wav,0.359165,1.222119,0.433163,0.979485,0.936454,1.272209,1.291506,1.256171,0.387830,1.304636,...,1.033826,0.128774,0.117175,0.119638,0.136938,0.097936,0.000000,0.097929,6.621657e-01,0.527669
rock.00097.wav,0.503706,1.302573,0.410017,1.107821,0.827056,1.175960,1.203014,1.155732,0.538680,1.330066,...,1.008336,0.085830,0.073442,0.102346,0.121149,0.134759,0.097929,0.000000,7.128432e-01,0.584827
rock.00098.wav,0.715042,0.500438,0.783622,0.497721,0.846808,0.823649,0.747662,0.760559,0.545327,0.770608,...,1.233309,0.882145,0.838117,0.672067,0.842023,0.650130,0.662166,0.712843,1.110223e-16,0.410959


In [8]:
def dis_aud(path, duration=5):
    "display audio"
    x,sr=librosa.load(path, sr=22050/2, duration=duration)
    ipd.display(ipd.Audio(x,rate=sr))

## Example 1. Beethoven Piano Sonata No.8

In [9]:
song_name = 'classical.00077.wav'
dis_aud('data/GTZAN_data/genres_original/'+song_name.split('.')[0]+'/'+song_name, 5)

- 노래:
    - Beethoven Piano sonata no.8, I. Grave *by Barenboim*

In [10]:
sim_songs = songs_similarity(song_name, features_scaled)

In [11]:
sim_songs.sort_values(ascending = True)

filename
classical.00079.wav    0.163032
classical.00065.wav    0.192554
classical.00035.wav    0.232339
classical.00066.wav    0.266772
classical.00058.wav    0.272568
                         ...   
metal.00053.wav        1.609674
metal.00051.wav        1.623948
metal.00087.wav        1.627464
disco.00003.wav        1.628881
metal.00078.wav        1.676451
Name: classical.00077.wav, Length: 999, dtype: float64

In [12]:
print('- 3 most similar songs: -')
for i in range(3):
    sim_song = sim_songs.index[i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most similar songs: -
classical.00079.wav


classical.00065.wav


classical.00035.wav


가장 가까운 노래 3개

- classical.00079.wav
    - Beethoven Piano sonata no.8, in c minor III. Rondo. Allegro *by Horowitz*
    - 같은 작곡가의 같은 음악 작품이지만, 다른 악장이며 연주자가 다르다
    
- classical.00065.wav
    - Schubert Piano sonata no.21 in b-flat III. Scherzo *by A. Brendel*
    - 같은 피아노 소나타 곡이지만 다른 작곡가이다 (다만 비슷한 시기 (classical-romantic))
    
- classical.00035.wav
    - Ravel Quartet in f major for strings: II
    - 같은 클래식 장르이다.

In [13]:
print('- 3 most different songs: -')
for i in range(1,4):
    sim_song = sim_songs.index[-i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most different songs: -
metal.00078.wav


disco.00003.wav


metal.00087.wav


가장 먼 노래 3개

- metal.00078.wav
    - Metallica - Prince Charming
    - 헤비메탈 곡이다. 이해가 된다..
    
- disco.00003.wav
    - Don Armando's 2nd Ave. Rhumba Band - Deputy of Love
    - 훵키한 디스코 곡이나 베이스와 드럼이 두드러진다.
    
- metal.00087.wav
    - Metallica - So What
    - 또다시 메탈리카..

## Example 2. Britney Spears - Lucky

In [14]:
song_name = 'pop.00030.wav'
dis_aud('data/GTZAN_data/genres_original/'+song_name.split('.')[0]+'/'+song_name, 5)

- 노래:
    -  Britney Spears - Lucky

In [15]:
sim_songs = songs_similarity(song_name, features_scaled)

In [16]:
print('- 3 most similar songs: -')
for i in range(3):
    sim_song = sim_songs.index[i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most similar songs: -
pop.00031.wav


pop.00020.wav


pop.00035.wav


가장 가까운 노래 3곡

- pop.00031.wav
    - 같은 노래이다 (*GTZAN 데이터에 같은 노래가 있다!*)

- pop.00020.wav
    - Britney Spears - Can't Make You Love Me
    - 같은 가수 브리트니 스피어스다.

- pop.00035.wav
    - Britney Spears - Stronger
    - "It's Britney, again"

In [17]:
print('- 3 most different songs: -')
for i in range(1,4):
    sim_song = sim_songs.index[-i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most different songs: -
rock.00099.wav


country.00086.wav


country.00050.wav


가장 먼 노래 3곡

- rock.00099.wav
    - The Stone Roses - This Is the One
    - 얼터너비트 록 밴드 The Stone Roses의 노래이다. 팝과 거리가 멀다고 할 수 있겠다.

- country.00086.wav
    - Brad Paisley - It Never Woulda Worked Out Anyway
    - 컨트리 송이다. 꽤나 잔잔하다.

- country.00050.wav
    - Vince Gill - Never Alone
    - 또, 컨트리 송이다.

## Example 3. A Tribe Called Quest - Electric Relaxation

In [18]:
song_name = 'hiphop.00050.wav'
dis_aud('data/GTZAN_data/genres_original/'+song_name.split('.')[0]+'/'+song_name, 5)

- 노래:
    -  A Tribe Called Quest - Electric Relaxation

In [19]:
sim_songs = songs_similarity(song_name, features_scaled)

In [20]:
print('- 3 most similar songs: -')
for i in range(3):
    sim_song = sim_songs.index[i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most similar songs: -
hiphop.00072.wav


reggae.00065.wav


hiphop.00073.wav


가장 가까운 노래 3곡

- hiphop.00072.wav
    - 같은 노래의 다른 부분이다.

- reggae.00065.wav
    - Dennis Brown - Rub a Dub All the Time
    - 레게 노래이다. 힙합과 가까운 장르이다. 드럼이 유사하다.

- hiphop.00073.wav
    - A Tribe Called Quest - Jazz (we've got)
    - 같은 밴드 ATCQ이다.

In [21]:
print('- 3 most different songs: -')
for i in range(1,4):
    sim_song = sim_songs.index[-i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most different songs: -
rock.00080.wav


rock.00082.wav


jazz.00021.wav


가장 먼 노래 3곡

- rock.00080.wav
    - Survivor - Is This Love
    - 팝 락이다.
    
- rock.00082.wav
    - Survivor - Burning Heart
    - 마찬가지이다.
    
- jazz.00021.wav
    - Joe Lovano - Uprising
    - ATCQ와 재즈가 멀리 있다는 결과가 아쉽지만, 목소리의 부재 드럼패턴 등 그럴만한 이유가 있어보인다.

# 유클리드 거리 기반 (Based on euclidean distance)

## Example 1. Beethoven Piano Sonata No.8

In [22]:
song_name = 'classical.00077.wav'
sim_songs = songs_similarity(song_name, features_scaled, metric='euclidean')

In [23]:
print('- 3 most similar songs: -')
for i in range(3):
    sim_song = sim_songs.index[i]
    print(sim_song)
    # ipd.display(ipd.Audio('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song))

- 3 most similar songs: -
classical.00079.wav
classical.00065.wav
classical.00035.wav


- 코사인 거리 기반과 동일하다 (오디오 디스플레이는 생략한다.)

In [24]:
print('- 3 most different songs: -')
for i in range(1,4):
    sim_song = sim_songs.index[-i]
    print(sim_song)
    dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most different songs: -
classical.00089.wav


pop.00053.wav


reggae.00086.wav


- classical.00089.wav
    - Vivaldi The Four Seasons: Violin Concerto no.1'spring'
    - 같은 클래식 곡이지만 **바이올린 소리**만 포함한다.
    
- pop.00053.wav
    - Destiny's Child - Outro
    - **아카펠라** 버전의 팝송이다!
    
- reggae.00086.wav
    - **깨진** 파일이다.

## Example 2. Britney Spears - Lucky

In [25]:
song_name = 'pop.00030.wav'
sim_songs = songs_similarity(song_name, features_scaled, metric='euclidean')

In [26]:
print('- 3 most similar songs: -')
for i in range(3):
    sim_song = sim_songs.index[i]
    print(sim_song)
    # ipd.display(ipd.Audio('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song))

- 3 most similar songs: -
pop.00031.wav
pop.00020.wav
pop.00035.wav


- 역시 코사인 거리 기반 측정과 같은 결과이다.

In [27]:
print('- 3 most different songs: -')
for i in range(1,4):
    sim_song = sim_songs.index[-i]
    print(sim_song)
dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most different songs: -
pop.00053.wav
classical.00089.wav
jazz.00007.wav


- 위 두곡은 Example 1과 같은 결과이며

- jazz.0007.wav
    - 잔잔한 재즈 곡이다.

## Example 3. A Tribe Called Quest - Electric Relaxation

In [28]:
song_name = 'hiphop.00050.wav'
sim_songs = songs_similarity(song_name, features_scaled, metric='euclidean')

In [29]:
print('- 3 most similar songs: -')
for i in range(3):
    sim_song = sim_songs.index[i]
    print(sim_song)
dis_aud('data/GTZAN_data/genres_original/'+sim_song.split('.')[0]+'/'+ sim_song, 5)

- 3 most similar songs: -
hiphop.00072.wav
reggae.00065.wav
hiphop.00066.wav


- 위 두곡은 cosine 거리 결과와 같다.

- hiphop.00066.wav
    - ATCQ - Bonita Applebum (same artist)


In [30]:
print('- 3 most different songs: -')
for i in range(1,4):
    sim_song = sim_songs.index[-i]
    print(sim_song)

- 3 most different songs: -
classical.00089.wav
pop.00053.wav
jazz.00007.wav


- example 2와 같다.

**유클리드 거리 기반으로 측정하면 가장 먼 곡이 한 악기나 보컬로만 구성되거나 소음만 있는 음악이다.**