<a href="https://colab.research.google.com/github/andryll/MGR-IC/blob/main/codes/Feature_Extraction_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importando as Bibliotecas

import numpy as np
import librosa
#import IPython.display as ipd
import os
import pandas as pd
from sklearn import preprocessing
from sklearn.decomposition import PCA
from scipy.fft import fft

In [2]:
def splitSongs (songList, duration, sr=44100):

  # Converta o tamanho da janela de segundos para amostras
  window_size_samples = int(duration * sr)

  # Inicialize uma lista para armazenar os segmentos
  segmentedList = []

  # Pega cada música da lista
  for y in songList:

    # Calcule o número total de segmentos
    num_segments = len(y[0]) // window_size_samples

    # Divida o áudio em segmentos de 5 segundos e adcione-os na lista
    for i in range(num_segments):
        start = i * window_size_samples
        end = (i + 1) * window_size_samples
        segment = (y[0][start:end], y[1], y[2])
        segmentedList.append(segment)

  # Retorna a nova lista
  return segmentedList

In [3]:
def readSongs (genre, numSongs, sr=44100, duration = 30):

  # Declarando listas iniciais
  songs = []
  genrelist = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz',
                'metal', 'pop', 'reggae', 'rock']

  # Se a escolha de gênero não for 'all', substitui a lista pelo gênero escolhido
  if genre != 'all':
    genrelist = [genre]

  #Percorre todos os gêneros da lista
  for g in genrelist:
    #Pega o caminho para a pasta do gênero escolhido
    dir_path = os.path.join('F:/Documentos/UTFPR/IC/songs', g)
    #Lista os arquivos da pasta e os embaralha
    files = os.listdir(dir_path)
    files.sort

    # Até o número de musicas desejado ser alcançado, lê os arquivos de áudio com o librosa
    for i in range(numSongs):
      songs.append(librosa.load(os.path.join(dir_path, files[i]), sr=sr, mono = True, duration = 30))
      # Adciona o gênero como uma variável da tupla
      songs[-1] = songs[-1] + (g,)

  max_len = max(len(song[0]) for song in songs)

  # Garante que todas as músicas terão o mesmo tamanho da maior
  resized_songs = []
  for song in songs:
      # Verifica se a música precisa ser redimensionada
      if len(song[0]) < max_len:
          # Adiciona zeros à direita para igualar o tamanho
          padded_audio = librosa.util.pad_center(data = song[0], size = max_len, axis = 0)
          resized_songs.append((padded_audio, song[1], song[2]))
      else:
          resized_songs.append(song)

  new_songs = splitSongs (resized_songs, sr=sr, duration = duration)

  return new_songs

### Extração das Features


In [4]:
def featureExtraction (songs, sr=44100, frame=512):

  colunas = ['Chroma mean', 'Chroma sd2',
             'RMS mean', 'RMS sd2', 'SpC mean', 'SpC sd2', 'SpBw mean', 'SpBw sd2', 'SpR99 mean', 'SpR99 sd2', 'SpR01 mean', 'SpR01 sd2', 'ZCR mean',
             'ZCR sd2', 'MFCC1 mean', 'MFCC1 sd2', 'MFCC2 mean', 'MFCC2 sd2', 'MFCC3 mean', 'MFCC3 sd2', 'MFCC4 mean', 'MFCC4 sd2', 'MFCC5 mean',
             'MFCC5 sd2', 'MFCC6 mean', 'MFCC6 sd2', 'MFCC7 mean', 'MFCC7 sd2', 'MFCC8 mean', 'MFCC8 sd2', 'MFCC9 mean', 'MFCC9 sd2', 'MFCC10 mean',
             'MFCC10 sd2', 'tempo1', 'tempo2', 'tempo3','Classe']

  df = pd.DataFrame(columns=colunas)

  for y in songs:

    #Chroma
    fft1 = np.abs(librosa.stft(y[0], hop_length=len(y[0])+1))
    chromas = librosa.feature.chroma_stft(S=fft1, sr=y[1], n_chroma=12)
    chroma_mean = np.mean(chromas)
    chroma_sd2 = np.var(chromas)

    #RMS
    rms = librosa.feature.rms(y=y[0], frame_length=frame, hop_length=int(frame/2))
    rms_mean = np.mean(rms)
    rms_sd2 = np.var(rms)

    #Spectral Centroid
    centroid = librosa.feature.spectral_centroid(y=y[0], sr=y[1], hop_length=int(frame/2), n_fft=frame)
    centroid_mean = np.mean(centroid)
    centroid_sd2 = np.var(centroid)

    #Spectral Bandwidth
    bandwidth = librosa.feature.spectral_bandwidth(y=y[0], sr = y[1], n_fft=frame, hop_length=int(frame/2))
    bandwidth_mean = np.mean(bandwidth)
    bandwidth_sd2 = np.var(bandwidth)

    #Spectral Rolloff
    roll99 = librosa.feature.spectral_rolloff(y=y[0], sr=y[1], n_fft=frame, hop_length=int(frame/2), roll_percent=0.99)
    roll99_mean = np.mean(roll99)
    roll99_sd2 = np.var(roll99)

    roll01 = librosa.feature.spectral_rolloff(y=y[0], sr=y[1], n_fft=frame, hop_length=int(frame/2), roll_percent=0.01)
    roll01_mean = np.mean(roll01)
    roll01_sd2 = np.var(roll01)

    #ZCR
    zcr = librosa.feature.zero_crossing_rate(y=y[0], frame_length=frame, hop_length=int(frame/2))
    zcr_mean = np.mean(zcr)
    zcr_sd2 = np.var(zcr)

    #MFCC
    mfcc = librosa.feature.mfcc(y=y[0], sr=y[1], n_mfcc=10)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_sd2 = np.var(mfcc, axis=1)

    #tempo
    onset_env = librosa.onset.onset_strength(y=y[0], sr=y[1])
    tempo = librosa.feature.tempo(y=y[0], sr = y[1], onset_envelope=onset_env, hop_length=int(frame/2), aggregate=None)
    hist, bin_edges = np.histogram(tempo, bins=3)

    #Adcionando ao DF

    df.loc[len(df)] = [chroma_mean, chroma_sd2,
                      rms_mean, rms_sd2, centroid_mean, centroid_sd2, bandwidth_mean, bandwidth_sd2, roll99_mean, roll99_sd2, roll01_mean, roll01_sd2,
                      zcr_mean, zcr_sd2, mfcc_mean[0], mfcc_sd2[0], mfcc_mean[1], mfcc_sd2[1], mfcc_mean[2], mfcc_sd2[2], mfcc_mean[3], mfcc_sd2[3],
                      mfcc_mean[4], mfcc_sd2[4], mfcc_mean[5], mfcc_sd2[5], mfcc_mean[6], mfcc_sd2[6], mfcc_mean[7], mfcc_sd2[7], mfcc_mean[8], mfcc_sd2[8],
                      mfcc_mean[9], mfcc_sd2[9], hist[0], hist[1], hist[2],y[2]]


  return df

### Execução

In [11]:
# Lê as músicas
songs = readSongs ('all', 100, 44100, 3)


In [6]:
df = featureExtraction ([songs[0]], sr=44100, frame=4096)

In [7]:
df.head()

Unnamed: 0,Chroma mean,Chroma sd2,RMS mean,RMS sd2,SpC mean,SpC sd2,SpBw mean,SpBw sd2,SpR99 mean,SpR99 sd2,...,MFCC8 mean,MFCC8 sd2,MFCC9 mean,MFCC9 sd2,MFCC10 mean,MFCC10 sd2,tempo1,tempo2,tempo3,Classe
0,0.713707,0.014308,0.124002,0.002836,1778.624727,133495.542331,1982.642962,105849.948368,8232.263184,350714.722425,...,-16.051859,64.345428,13.069498,103.55162,-1.408203,54.695477,151,220,60,blues


In [8]:
df.tail()

Unnamed: 0,Chroma mean,Chroma sd2,RMS mean,RMS sd2,SpC mean,SpC sd2,SpBw mean,SpBw sd2,SpR99 mean,SpR99 sd2,...,MFCC8 mean,MFCC8 sd2,MFCC9 mean,MFCC9 sd2,MFCC10 mean,MFCC10 sd2,tempo1,tempo2,tempo3,Classe
0,0.713707,0.014308,0.124002,0.002836,1778.624727,133495.542331,1982.642962,105849.948368,8232.263184,350714.722425,...,-16.051859,64.345428,13.069498,103.55162,-1.408203,54.695477,151,220,60,blues


In [12]:
# Extrai as features para os frame lenghts desejados
frame_lenghts = [256, 512, 1024, 2048, 4096, 8192]

dflist = []
for f in frame_lenghts:
  dflist.append(featureExtraction(songs, sr=44100, frame=f))

  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pitch_tuning(
  return pi

In [13]:
csvnames = ['F:/Documentos/UTFPR/IC/csvs/ft3/3s/256.csv', 'F:/Documentos/UTFPR/IC/csvs/ft3/3s/512.csv','F:/Documentos/UTFPR/IC/csvs/ft3/3s/1024.csv','F:/Documentos/UTFPR/IC/csvs/ft3/3s/2048.csv',
            'F:/Documentos/UTFPR/IC/csvs/ft3/3s/4096.csv','F:/Documentos/UTFPR/IC/csvs/ft3/3s/8192.csv']

for i in range(len(dflist)):
  dflist[i].to_csv(csvnames[i], sep=',', index=False, encoding='utf-8')

In [None]:
song = songs[1]
chroma = librosa.feature.chroma_stft(y=song[0], sr=song[1], n_chroma=7)
media_linhas = np.mean(chroma, axis=1)
sd2 = np.std(chroma, axis=1)**2

testes


In [7]:
y = songs[0]

In [81]:
onset_env = librosa.onset.onset_strength(y=y[0], sr=y[1])

tempo = librosa.feature.tempo(y=y[0], sr = y[1], onset_envelope=onset_env, hop_length=512, aggregate=None)
tempo

array([126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04801829,
       126.04801829, 126.04801829, 126.04801829, 126.04

In [82]:
hist, bin_edges = np.histogram(tempo, bins=3)
hist

array([191,   0, 240], dtype=int64)

In [25]:
bin_edges

array([89.10290948, 92.52994446, 95.95697944, 99.38401442])

In [65]:
from scipy.fft import fft
fft1 = np.abs(librosa.stft(y[0], hop_length=len(y[0])+1))

In [66]:
fft1.shape

(1025, 1)

In [68]:
chromas = librosa.feature.chroma_stft(S = fft1, sr=y[1], n_chroma=12)
chroma_mean = np.mean(chromas)
chroma_sd2 = np.std(chromas)

In [69]:
chroma_mean

0.71370727

In [70]:
chroma_sd2**2

0.01430788095240132

In [71]:
np.var(chromas)

0.014307882