# Exploratory Data Analysis (EDA) and Feature Extraction 

This jupyter notebook will explore the GTZAN dataset to gain familiarity with the data as well as extract features from the audio files

## Exploratory Data Analysis (EDA)

In [25]:
import librosa  #python audio processing library
import librosa.display
import IPython.display as ipd # listen to music in jupyter notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

### Load Dataset

In [26]:
import os
data_files = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if "wav" in filename:
            data_files.append(os.path.join(dirname, filename))
print(f"There are {len(data_files)} number of audio files in this dataset")

There are 1000 number of audio files in this dataset


### Listening to and Visualizing Audio Files

In [None]:
ipd.Audio(data_files[0])

In [None]:
first_audio, sr = librosa.load(data_files[0]) # automatically resampled to 22050 can set sr=None to preserve the original sampling rate

In [None]:
librosa.display.waveplot(first_audio, alpha=0.5)
plt.title(data_files[0].split("/")[-1])
plt.ylim((-1,1))
plt.ylabel("Amplitude")

## Understanding and Visualizing Features from Librosa

We will test extracting features from the first audio file using librosa and visualize them 

### Root Mean Square Energy

Root Mean Square Energy is essentially the sqaure root of the mean of the $amplitude^2$. We take each frame (a collection of data points) and obtain the root mean square of each. 
It is an indicator of loudness but less sensitive to outliers

In [None]:
FRAME_LENGTH = 1024 
HOP_LENGTH = 512

rms_first_audio = librosa.feature.rms(first_audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
print(f"The RSME of the first audio file is: {rms_first_audio}")
frames = range(len(rms_first_audio))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)
librosa.display.waveplot(first_audio, alpha=0.5)
plt.plot(t, rms_first_audio, color='green')
plt.title(data_files[0].split("/")[-1])
plt.ylim((-1,1))
plt.ylabel("Amplitude");

### Zero Crossing Rate

The number of times the signal crosses the horizontal axis. It is calculated by checking the difference between subsequent samples. In the sample below, it is normalized It can be a measure of "lack of smoothness" and "percussiveness" in the audio sample. 

In [None]:
FRAME_LENGTH = 1024 
HOP_LENGTH = 512

zero_first_audio = librosa.feature.zero_crossing_rate(first_audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
print(f"The Zero Crossing Rate of the first audio file is: {zero_first_audio}")
frames = range(len(zero_first_audio))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)
plt.figure(figsize=(20,10))
plt.subplot(2,2,1)
librosa.display.waveplot(first_audio, alpha=0.5)
plt.plot(t, zero_first_audio, color='orange')
plt.title(data_files[0].split("/")[-1])
plt.ylim((-1,1))

plt.subplot(2,2,2)
plt.plot(t, zero_first_audio, color='orange')
plt.title(data_files[0].split("/")[-1])
plt.ylim((0,1))

# Feature Extraction
* Mel-frequency cepstral coefficients (MFCCs) = the way humans perceive sound
* Spectral Centroid - where most of the energy is concentrated; "brightness of sound"; calculated as the weighted mean of the frequencies 
* Spectral Bandwidth - Variance from the Spectral Centroid; calculated as the weighted distances from the spectral centroid
* Tempo - beats per minute

In [27]:
FRAME_LENGTH = 1024 
HOP_LENGTH = 512

In [28]:
def calculate_mean(arr):
    if len(arr) == 0:
        return None
    else: 
        return sum(arr)/ len (arr)

In [41]:
def extract_features(file_name):
    try: 
        audio, sr = librosa.load(file_name)
    except Exception: 
        print(f"Error occured while loading {file_name}")
        return None
    rmse_arr = librosa.feature.rms(audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
    zcr_arr = librosa.feature.zero_crossing_rate(audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
    mfcc_arr = librosa.feature.mfcc(audio, n_mfcc=13, sr=sr)[0]
    cent_arr = librosa.feature.spectral_centroid(audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
    spec_band = librosa.feature.spectral_bandwidth(audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
    chroma = librosa.feature.chroma_stft(audio, sr=sr, n_fft=FRAME_LENGTH, hop_length=HOP_LENGTH)[0]
    tempo = librosa.beat.tempo(audio, sr=sr, hop_length=HOP_LENGTH)[0]
    harmonic = librosa.effects.harmonic(audio)[0]
    percussive = librosa.effects.percussive(audio)[0]
    return [file_name, file_name.split("/")[-1], 
            rmse_arr, calculate_mean(rmse_arr), 
            zcr_arr, calculate_mean(zcr_arr), 
            mfcc_arr, calculate_mean(mfcc_arr), 
            cent_arr, calculate_mean(cent_arr), 
            spec_band, calculate_mean(spec_band), 
            chroma, calculate_mean(chroma),
            tempo, harmonic, percussive]

In [45]:
extracted_data = []
x = 0
for i in data_files:
    features = extract_features(i)
    if features: 
        extracted_data.append(features)
    x+=1
    print(f"Processed {x} features")
        
cols = ["File Path", "File Name", "rmse","zcr","mfcc", "spec_cent", "spec_band", "chroma", "tempo", "harmonic", "percussive"]
cols_new = [cols[0], cols[1]]
for x in range(2, len(cols)):
    cols_new.append(cols[x])
    if cols[x] != "harmonic" and cols[x] != "percussive" and cols[x] != "tempo": 
        cols_new.append(cols[x]+"_mean")
print(cols_new)
df = pd.DataFrame(extracted_data, columns=cols_new)
df.set_index('File Path', inplace=True)

Processed 1 features
Processed 2 features
Processed 3 features
Processed 4 features
Processed 5 features
Processed 6 features
Processed 7 features
Processed 8 features
Processed 9 features
Processed 10 features
Processed 11 features
Processed 12 features
Processed 13 features
Processed 14 features
Processed 15 features
Processed 16 features
Processed 17 features
Processed 18 features
Processed 19 features
Processed 20 features
Processed 21 features
Processed 22 features
Processed 23 features
Processed 24 features
Processed 25 features
Processed 26 features
Processed 27 features
Processed 28 features
Processed 29 features
Processed 30 features
Processed 31 features
Processed 32 features
Processed 33 features
Processed 34 features
Processed 35 features
Processed 36 features
Processed 37 features
Processed 38 features
Processed 39 features
Processed 40 features
Processed 41 features
Processed 42 features
Processed 43 features
Processed 44 features
Processed 45 features
Processed 46 featur



Error occured while loading /kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav
Processed 627 features
Processed 628 features
Processed 629 features
Processed 630 features
Processed 631 features
Processed 632 features
Processed 633 features
Processed 634 features
Processed 635 features
Processed 636 features
Processed 637 features
Processed 638 features
Processed 639 features
Processed 640 features
Processed 641 features
Processed 642 features
Processed 643 features
Processed 644 features
Processed 645 features
Processed 646 features
Processed 647 features
Processed 648 features
Processed 649 features
Processed 650 features
Processed 651 features
Processed 652 features
Processed 653 features
Processed 654 features
Processed 655 features
Processed 656 features
Processed 657 features
Processed 658 features
Processed 659 features
Processed 660 features
Processed 661 features
Processed 662 features
Processed 663 features
Processed 664 features
Pr

In [47]:
df.shape

(999, 16)

In [49]:
df.to_csv("/kaggle/working/data.csv")

## EDA on new features

In [None]:
sns.heatmap(df[df.columns[2:]].corr())
plt.title('Correlation Heatmap', fontsize = 20)
plt.xticks(fontsize = 10)
plt.yticks(fontsize = 10);