<a href="https://colab.research.google.com/github/andryll/MGR-IC/blob/main/codes/Feature_Extraction_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importando as Bibliotecas

import numpy as np
import librosa
#import IPython.display as ipd
import os
import pandas as pd
from sklearn import preprocessing
from sklearn.decomposition import PCA

In [2]:
def splitSongs (songList, duration, sr=44100):

  # Converta o tamanho da janela de segundos para amostras
  window_size_samples = int(duration * sr)

  # Inicialize uma lista para armazenar os segmentos
  segmentedList = []

  # Pega cada música da lista
  for y in songList:

    # Calcule o número total de segmentos
    num_segments = len(y[0]) // window_size_samples

    # Divida o áudio em segmentos de 5 segundos e adcione-os na lista
    for i in range(num_segments):
        start = i * window_size_samples
        end = (i + 1) * window_size_samples
        segment = (y[0][start:end], y[1], y[2])
        segmentedList.append(segment)

  # Retorna a nova lista
  return segmentedList

In [3]:
def readSongs (genre, numSongs, sr=44100, duration = 30):

  # Declarando listas iniciais
  songs = []
  genrelist = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
                'metal', 'pop', 'reggae', 'rock']

  # Se a escolha de gênero não for 'all', substitui a lista pelo gênero escolhido
  if genre != 'all':
    genrelist = [genre]

  #Percorre todos os gêneros da lista
  for g in genrelist:
    #Pega o caminho para a pasta do gênero escolhido
    dir_path = os.path.join('F:/Documentos/UTFPR/IC/songs', g)
    #Lista os arquivos da pasta e os embaralha
    files = os.listdir(dir_path)
    files.sort

    # Até o número de musicas desejado ser alcançado, lê os arquivos de áudio com o librosa
    for i in range(numSongs):
      songs.append(librosa.load(os.path.join(dir_path, files[i]), sr=sr, mono = True, duration = 30))
      # Adciona o gênero como uma variável da tupla
      songs[-1] = songs[-1] + (g,)

  max_len = max(len(song[0]) for song in songs)

  # Garante que todas as músicas terão o mesmo tamanho da maior
  resized_songs = []
  for song in songs:
      # Verifica se a música precisa ser redimensionada
      if len(song[0]) < max_len:
          # Adiciona zeros à direita para igualar o tamanho
          padded_audio = librosa.util.pad_center(data = song[0], size = max_len, axis = 0)
          resized_songs.append((padded_audio, song[1], song[2]))
      else:
          resized_songs.append(song)

  new_songs = splitSongs (resized_songs, sr=sr, duration = duration)

  return new_songs

### Extração das Features


In [47]:
def featureExtraction (songs, sr=44100, frame=512):

  colunas = ['Chroma mean C', 'Chroma sd2 C','Chroma mean C#', 'Chroma sd2 C#', 'Chroma mean D', 'Chroma sd2 D', 'Chroma mean D#', 'Chroma sd2 D#',
             'Chroma mean E', 'Chroma sd2 E', 'Chroma mean F', 'Chroma sd2 F', 'Chroma mean F#', 'Chroma sd2 F#', 'Chroma mean G', 'Chroma sd2 G',
             'Chroma mean Ab', 'Chroma sd2 Ab', 'Chroma mean A', 'Chroma sd2 A', 'Chroma mean Bb', 'Chroma sd2 Bb', 'Chroma mean B', 'Chroma sd2 B',
             'RMS mean', 'RMS sd2', 'SpC mean', 'SpC sd2', 'SpBw mean', 'SpBw sd2', 'SpR99 mean', 'SpR99 sd2', 'SpR01 mean', 'SpR01 sd2', 'ZCR mean',
             'ZCR sd2', 'MFCC1 mean', 'MFCC1 sd2', 'MFCC2 mean', 'MFCC2 sd2', 'MFCC3 mean', 'MFCC3 sd2', 'MFCC4 mean', 'MFCC4 sd2', 'MFCC5 mean',
             'MFCC5 sd2', 'MFCC6 mean', 'MFCC6 sd2', 'MFCC7 mean', 'MFCC7 sd2', 'MFCC8 mean', 'MFCC8 sd2', 'MFCC9 mean', 'MFCC9 sd2', 'MFCC10 mean',
             'MFCC10 sd2', 'MFCC11 mean', 'MFCC11 sd2', 'MFCC12 mean', 'MFCC12 sd2', 'MFCC13 mean', 'MFCC13 sd2', 'MFCC14 mean', 'MFCC14 sd2', 'MFCC15 mean',
             'MFCC15 sd2', 'MFCC16 mean', 'MFCC16 sd2', 'MFCC17 mean', 'MFCC17 sd2', 'MFCC18 mean', 'MFCC18 sd2', 'MFCC19 mean', 'MFCC19 sd2', 'MFCC20 mean',
             'MFCC20 sd2', 'tempo', 'Classe']

  df = pd.DataFrame(columns=colunas)

  for y in songs:

    #Chroma
    chromas = librosa.feature.chroma_stft(y=y[0], sr=y[1])
    chroma_mean = np.mean(chromas, axis=1)
    chroma_sd2 = np.std(chromas, axis=1)**2

    #RMS
    rms = librosa.feature.rms(y=y[0], frame_length=frame, hop_length=int(frame/2))
    rms_mean = np.mean(rms)
    rms_sd2 = np.std(rms)**2

    #Spectral Centroid
    centroid = librosa.feature.spectral_centroid(y=y[0], sr=y[1], hop_length=int(frame/2), n_fft=frame)
    centroid_mean = np.mean(centroid)
    centroid_sd2 = np.std(centroid)**2

    #Spectral Bandwidth
    bandwidth = librosa.feature.spectral_bandwidth(y=y[0], sr = y[1], n_fft=frame, hop_length=int(frame/2))
    bandwidth_mean = np.mean(bandwidth)
    bandwidth_sd2 = np.std(bandwidth)**2

    #Spectral Rolloff
    roll99 = librosa.feature.spectral_rolloff(y=y[0], sr=y[1], n_fft=frame, hop_length=int(frame/2), roll_percent=0.99)
    roll99_mean = np.mean(roll99)
    roll99_sd2 = np.std(roll99)**2

    roll01 = librosa.feature.spectral_rolloff(y=y[0], sr=y[1], n_fft=frame, hop_length=int(frame/2), roll_percent=0.01)
    roll01_mean = np.mean(roll01)
    roll01_sd2 = np.std(roll01)**2

    #ZCR
    zcr = librosa.feature.zero_crossing_rate(y=y[0], frame_length=frame, hop_length=int(frame/2))
    zcr_mean = np.mean(zcr)
    zcr_sd2 = np.std(zcr)**2

    #MFCC
    mfcc = librosa.feature.mfcc(y=y[0], sr=y[1], n_mfcc=20)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_sd2 = np.std(mfcc, axis=1)**2
    #Harmony

    #tempo
    tempo = librosa.feature.tempo(y=y[0], sr=y[1], hop_length=int(frame/2))

    #Adcionando ao DF

    df.loc[len(df)] = [chroma_mean[0], chroma_sd2[0], chroma_mean[1], chroma_sd2[1], chroma_mean[2], chroma_sd2[2], chroma_mean[3], chroma_sd2[3],
                      chroma_mean[4], chroma_sd2[4], chroma_mean[5], chroma_sd2[5], chroma_mean[6], chroma_sd2[6], chroma_mean[7], chroma_sd2[7],
                      chroma_mean[8], chroma_sd2[8], chroma_mean[9], chroma_sd2[9], chroma_mean[10], chroma_sd2[10], chroma_mean[11], chroma_sd2[11],
                      rms_mean, rms_sd2, centroid_mean, centroid_sd2, bandwidth_mean, bandwidth_sd2, roll99_mean, roll99_sd2, roll01_mean, roll01_sd2,
                      zcr_mean, zcr_sd2, mfcc_mean[0], mfcc_sd2[0], mfcc_mean[1], mfcc_sd2[1], mfcc_mean[2], mfcc_sd2[2], mfcc_mean[3], mfcc_sd2[3],
                      mfcc_mean[4], mfcc_sd2[4], mfcc_mean[5], mfcc_sd2[5], mfcc_mean[6], mfcc_sd2[6], mfcc_mean[7], mfcc_sd2[7], mfcc_mean[8], mfcc_sd2[8],
                      mfcc_mean[9], mfcc_sd2[9], mfcc_mean[10], mfcc_sd2[10], mfcc_mean[11], mfcc_sd2[11], mfcc_mean[12], mfcc_sd2[12], mfcc_mean[13], mfcc_sd2[13],
                      mfcc_mean[14], mfcc_sd2[14], mfcc_mean[15], mfcc_sd2[15], mfcc_mean[16], mfcc_sd2[16], mfcc_mean[17], mfcc_sd2[17], mfcc_mean[18], mfcc_sd2[18],
                      mfcc_mean[19], mfcc_sd2[19], tempo[0], y[2]]

    # df = df.append({colunas[0]: chroma_mean[0], colunas[1]: chroma_sd2[0], colunas[2]: chroma_mean[0], colunas[3]: chroma_sd2[0],
    #                 colunas[4]: chroma_mean[0], colunas[5]: chroma_sd2[0], colunas[6]: chroma_mean[0], colunas[7]: chroma_sd2[0],
    #                 colunas[8]: chroma_mean[0], colunas[9]: chroma_sd2[0], colunas[10]: chroma_mean[0], colunas[11]: chroma_sd2[0],
    #                 colunas[12]: chroma_mean[0], colunas[13]: chroma_sd2[0], colunas[14]: chroma_mean[0], colunas[15]: chroma_sd2[0],
    #                 colunas[16]: chroma_mean[0], colunas[17]: chroma_sd2[0], colunas[18]: chroma_mean[0], colunas[19]: chroma_sd2[0],
    #                 colunas[20]: chroma_mean[0], colunas[21]: chroma_sd2[0], colunas[22]: chroma_mean[0], colunas[23]: chroma_sd2[0],
    #                 colunas[24]: rms_mean, colunas[25]: rms_sd2, colunas[26]: centroid_mean, colunas[27]: centroid_sd2,
    #                 colunas[28]: bandwidth_mean, colunas[29]: bandwidth_sd2, colunas[30]: roll99_mean, colunas[31]: roll99_sd2,
    #                 colunas[32]: roll01_mean, colunas[33]: roll01_sd2, colunas[34]: zcr_mean, colunas[35]: zcr_sd2,
    #                 colunas[36]: mfcc_mean[0], colunas[37]: mfcc_sd2[0], colunas[38]: mfcc_mean[1], colunas[39]: mfcc_sd2[1],
    #                 colunas[40]: mfcc_mean[2], colunas[41]: mfcc_sd2[2], colunas[42]: mfcc_mean[3], colunas[43]: mfcc_sd2[3],
    #                 colunas[44]: mfcc_mean[4], colunas[45]: mfcc_sd2[4], colunas[46]: mfcc_mean[5], colunas[47]: mfcc_sd2[5],
    #                 colunas[48]: mfcc_mean[6], colunas[49]: mfcc_sd2[6], colunas[50]: mfcc_mean[7], colunas[51]: mfcc_sd2[7],
    #                 colunas[52]: mfcc_mean[8], colunas[53]: mfcc_sd2[8], colunas[54]: mfcc_mean[9], colunas[55]: mfcc_sd2[9],
    #                 colunas[56]: mfcc_mean[10], colunas[57]: mfcc_sd2[10], colunas[58]: mfcc_mean[11], colunas[59]: mfcc_sd2[11],
    #                 colunas[60]: mfcc_mean[12], colunas[61]: mfcc_sd2[12], colunas[62]: mfcc_mean[13], colunas[63]: mfcc_sd2[13],
    #                 colunas[64]: mfcc_mean[14], colunas[65]: mfcc_sd2[14], colunas[66]: mfcc_mean[15], colunas[67]: mfcc_sd2[15],
    #                 colunas[68]: mfcc_mean[16], colunas[69]: mfcc_sd2[16], colunas[70]: mfcc_mean[17], colunas[71]: mfcc_sd2[17],
    #                 colunas[72]: mfcc_mean[18], colunas[73]: mfcc_sd2[18], colunas[74]: mfcc_mean[19], colunas[75]: mfcc_sd2[19],
    #                 colunas[76]: tempo[0], colunas[77]: y[2]}, ignore_index=True)


  return df

### Execução

In [62]:
# Lê as músicas
songs = readSongs ('all', 100, 44100, 5)


In [49]:
df = featureExtraction (songs, sr=44100, frame=1024)

In [50]:
df.head()

Unnamed: 0,Chroma mean C,Chroma sd2 C,Chroma mean C#,Chroma sd2 C#,Chroma mean D,Chroma sd2 D,Chroma mean D#,Chroma sd2 D#,Chroma mean E,Chroma sd2 E,...,MFCC17 mean,MFCC17 sd2,MFCC18 mean,MFCC18 sd2,MFCC19 mean,MFCC19 sd2,MFCC20 mean,MFCC20 sd2,tempo,Classe
0,0.412093,0.109936,0.460608,0.096704,0.484695,0.10498,0.357,0.066946,0.367279,0.076532,...,5.786384,33.574333,-5.765438,50.269703,0.361835,61.832661,-0.824205,32.530842,123.046875,blues
1,0.376765,0.092698,0.393495,0.075831,0.496074,0.10301,0.353649,0.059327,0.33714,0.066783,...,4.301836,32.02599,-5.919653,31.393242,0.281941,34.930019,0.895956,32.317299,126.048018,blues
2,0.405728,0.109972,0.406437,0.093525,0.475206,0.104613,0.310911,0.059642,0.279308,0.05511,...,5.209172,35.386288,-4.989812,48.895252,1.990291,47.468998,-0.0392,34.249947,123.046875,blues
3,0.613069,0.126739,0.499344,0.097389,0.495903,0.089821,0.351228,0.05798,0.324322,0.052651,...,3.44927,65.48082,-5.161124,52.095337,-1.111064,54.012756,1.659975,45.223988,105.46875,blues
4,0.399145,0.112249,0.378123,0.095132,0.409525,0.105325,0.377179,0.092366,0.386116,0.069363,...,1.90057,41.271187,-7.168017,46.206917,2.952615,46.50415,2.604873,44.300915,105.46875,blues


In [63]:
# Extrai as features para os frame lenghts desejados
frame_lenghts = [256, 512, 1024, 2048, 4096, 8192]

dflist = []
for f in frame_lenghts:
  dflist.append(featureExtraction(songs, sr=44100, frame=f))

In [64]:
csvnames = ['ft2/5s/256.csv', 'ft2/5s/512.csv','ft2/5s/1024.csv','ft2/5s/2048.csv','ft2/5s/4096.csv','ft2/5s/8192.csv']

for i in range(len(dflist)):
  dflist[i].to_csv(csvnames[i], sep=',', index=False, encoding='utf-8')

In [21]:
song = songs[1]
chroma = librosa.feature.chroma_stft(y=song[0], sr=song[1], n_chroma=7)
media_linhas = np.mean(chroma, axis=1)
sd2 = np.std(chroma, axis=1)**2

In [10]:
print(media_linhas)
print(sd2)
chroma[0]

[0.47524655 0.54116124 0.4977941  0.54429436 0.4182635  0.4914444
 0.52466947]
[0.07518348 0.10307483 0.09651286 0.10222084 0.06550278 0.08501279
 0.10831227]


array([1.        , 0.9798253 , 1.        , ..., 0.6343053 , 1.        ,
       0.84393334], dtype=float32)

In [34]:
bandwidth = librosa.feature.spectral_bandwidth(y=song[0], sr = song[1], n_fft=1024, hop_length=int(1024/2))
bandwidth.shape

(1, 2584)

In [36]:
colunas = ['Chroma mean C', 'Chroma mean C#', 'Chroma mean D', 'Chroma mean D#', 'Chroma mean E', 'Chroma mean F', 'Chroma mean F#',
             'Chroma mean G', 'Chroma mean Ab', 'Chroma mean A', 'Chroma mean Bb', 'Chroma mean B', 'Chroma sd2 C', 'Chroma sd2 C#',
             'Chroma sd2 D', 'Chroma sd2 D#', 'Chroma sd2 E', 'Chroma sd2 F', 'Chroma sd2 F#', 'Chroma sd2 G', 'Chroma sd2 Ab',
             'Chroma sd2 A', 'Chroma sd2 Bb', 'Chroma sd2 B', 'RMS mean', 'RMS sd2', 'SpC mean', 'SpC sd2', 'SpBw mean', 'SpBw sd2',
             'SpR99 mean', 'SpR99 sd2', 'SpR01 mean', 'SpR01 sd2', 'ZCR mean', 'ZCR sd2', 'MFCC1 mean', 'MFCC1 sd2', 'MFCC2 mean', 'MFCC2 sd2',
             'MFCC3 mean', 'MFCC3 sd2', 'MFCC4 mean', 'MFCC4 sd2', 'MFCC5 mean', 'MFCC5 sd2', 'MFCC6 mean', 'MFCC6 sd2', 'MFCC7 mean', 'MFCC7 sd2',
             'MFCC8 mean', 'MFCC8 sd2', 'MFCC9 mean', 'MFCC9 sd2', 'MFCC10 mean', 'MFCC10 sd2', 'MFCC11 mean', 'MFCC11 sd2', 'MFCC12 mean', 'MFCC12 sd2',
             'MFCC13 mean', 'MFCC13 sd2', 'MFCC14 mean', 'MFCC14 sd2', 'MFCC15 mean', 'MFCC15 sd2', 'MFCC16 mean', 'MFCC16 sd2', 'MFCC17 mean', 'MFCC17 sd2', 'MFCC18 mean', 'MFCC18 sd2',
             'MFCC19 mean', 'MFCC19 sd2', 'MFCC20 mean', 'MFCC20 sd2', 'tempo', 'Classe']
len(colunas)

78

In [59]:
display(dflist[5])

Unnamed: 0,Chroma mean C,Chroma sd2 C,Chroma mean C#,Chroma sd2 C#,Chroma mean D,Chroma sd2 D,Chroma mean D#,Chroma sd2 D#,Chroma mean E,Chroma sd2 E,...,MFCC17 mean,MFCC17 sd2,MFCC18 mean,MFCC18 sd2,MFCC19 mean,MFCC19 sd2,MFCC20 mean,MFCC20 sd2,tempo,Classe
0,0.443220,0.116929,0.493957,0.090730,0.530423,0.111769,0.362471,0.059003,0.366930,0.088381,...,6.636033,32.759502,-7.169274,34.095005,-3.058473,35.901241,-1.946720,31.588928,129.199219,blues
1,0.313655,0.072836,0.373641,0.082337,0.442523,0.084624,0.334208,0.061491,0.329513,0.063039,...,5.320587,46.470245,-6.949818,58.184605,2.669857,55.784626,-1.981014,40.155029,129.199219,blues
2,0.499088,0.125944,0.492935,0.111259,0.526621,0.123019,0.429196,0.092391,0.345693,0.068706,...,5.913382,24.065811,-2.474474,49.794083,4.369329,60.341129,0.928223,25.852682,129.199219,blues
3,0.437377,0.107753,0.464106,0.081459,0.464236,0.087225,0.362642,0.068650,0.375712,0.064050,...,5.416685,30.429169,-9.023584,36.059143,-1.789123,48.316597,-1.672008,22.640732,161.499023,blues
4,0.282027,0.069667,0.342922,0.069852,0.494003,0.101733,0.333867,0.057179,0.324747,0.071538,...,4.384613,28.869833,-7.393156,33.275806,1.636896,31.263004,2.227854,35.030254,129.199219,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.460943,0.059832,0.439371,0.077235,0.279480,0.031260,0.336385,0.050911,0.484977,0.124962,...,8.832020,48.528564,-1.581410,56.158936,5.622682,33.483807,-3.560943,33.956509,129.199219,rock
9996,0.602753,0.065545,0.474868,0.080850,0.375728,0.032099,0.477102,0.078766,0.474156,0.105281,...,3.798223,41.249592,-5.633295,40.627155,3.103715,37.154549,-6.987812,38.558422,107.666016,rock
9997,0.440145,0.046208,0.437082,0.078686,0.396972,0.022333,0.641139,0.036611,0.797369,0.063165,...,-2.145400,75.089462,-7.261323,36.422863,-0.795893,75.189888,-1.485561,24.342381,129.199219,rock
9998,0.507586,0.057185,0.250050,0.029906,0.323138,0.027371,0.541403,0.036657,0.811117,0.053058,...,5.365038,17.100443,-4.954843,20.242487,2.777913,29.258425,-2.349374,21.315361,129.199219,rock
