In [None]:
#run this once
!pip install librosa
!pip install gdown -U --no-cache-dir

In [None]:
#get the files
import gdown
url = 'https://drive.google.com/drive/folders/1rDbrXwWj6smXSmK3EkkxUTkP4AtauEK8'
gdown.download_folder(url)

In [None]:
import os
import librosa
import librosa.display
import IPython.display as ipd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import shutil
from pydub import AudioSegment
from pydub.silence import split_on_silence
from glob import glob
import pandas as pd

## Loading audio files with Librosa

In [None]:
scale_file = "audio/scale.wav"
guitar_major = "audio/Guitar Major.wav"
guitar_minor = "audio/Guitar Minor.wav"
dPiano_major = "audio/Digital Piano Major.wav"
dPiano_minor = "audio/Digital Piano Minor.wav"

In [None]:
ipd.Audio(guitar_major)

## Preprocessing

In [None]:
path = "Audio Chunks"
if not os.path.exists(path):
    os.makedirs(path)
else:
    shutil.rmtree(path)
    os.makedirs(path)
    
#crop function
def chunk(file, name):
    sound_file = AudioSegment.from_wav(file)
    audio_chunks = split_on_silence(sound_file, min_silence_len=500, silence_thresh=-40 )
    
    for i, chunk in enumerate(audio_chunks):
        out_file = "Audio Chunks/"+ name +"_{0}.wav".format(i+1)
        print("exporting", out_file)
        chunk.export(out_file, format="wav")

In [None]:
chunk(dPiano_major, "Digital Piano Major")
chunk(dPiano_minor, "Digital Piano Minor")
chunk(guitar_major, "Guitar Major")
chunk(guitar_minor, "Guitar Minor")

## Extracting Short-Time Fourier Transform (Time Domain)

In [None]:
FRAME_SIZE = 2048 #window size
HOP_SIZE = 512 #overlap size

def getSTFT(file):
    stft_output = librosa.stft(file, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
    return stft_output

## Calculating the spectrogram (Frequency Domain)

In [None]:
def getSpec(stft):
    spec_output = np.abs(stft) ** 2
    return spec_output

## Linear-Frequency Power Spectrogram

In [None]:
def getPower(spec):
    power_output = librosa.power_to_db(spec)
    return power_output

## Visualizing the spectrogram

In [None]:
def plot_spectrogram(Y, sr, hop_length, splot, title, 
                     y_axis="linear", showcb=True):
    plt.subplot(splot)
    librosa.display.specshow(Y, 
                             sr=sr, 
                             hop_length=hop_length, 
                             x_axis="time", 
                             y_axis=y_axis,
                             cmap='inferno')
    if(showcb):
      plt.colorbar(format="%+2.f")

    plt.title(title)
    return plt

In [None]:
audios = glob("Audio Chunks/*.wav")

In [None]:
for i, wav in enumerate(audios):
    name = wav.split(".")[0].split("\\")[1]
    file, sr = librosa.load(wav) #load audio
    stft_out = getSTFT(file) 
    spec_out = getSpec(stft_out)
    power_out = getPower(spec_out)
    plt.figure(i+1, figsize=(15,5))
    plot_spectrogram(power_out, sr, HOP_SIZE, 121, name + " Linear")
    plot_spectrogram(power_out, sr, HOP_SIZE, 122, name + " Log", y_axis="log")
    plt.show()

### To-do 2:  Feature Extraction
Encode the whole spectrogram values to a list( 1 for frequency and 1 for intensity) and visualize it using a histogram.  Then create a data frame with the following columns [`frequency`, `intensity`, `chord`]

In [None]:
for i, wav in enumerate(audios):
    name = wav.split(".")[0].split("\\")[1]
    file, sr = librosa.load(wav) #load audio
    stft_out = getSTFT(file) 
    spec_out = getSpec(stft_out)
    power_out = getPower(spec_out)

    plt.figure(1+i, figsize=(10,5))
    plt.subplot(121)
    plt.title(name + "_Frequency Histogram")
    plt.plot(spec_out)

    plt.subplot(122)
    plt.title(name + "_Intensity Histogram")
    plt.plot(power_out)
    plt.show()

In [None]:
# Our hearing range is commonly 20 Hz to 20 kHz
# Starting with 55 Hz which is "A" (I divided 440 by 2 three times)
curr_freq = 55
freq_list = []

# I want to calculate 8 octaves of notes. Each octave has 12 notes. Looping for 96 steps:
for i in range(96): 
    freq_list.append(curr_freq)
    curr_freq *= np.power(2, 1/12) # Multiplying by 2^(1/12)

#reshaping and creating dataframe
freq_array = np.reshape(np.round(freq_list,1), (8, 12))
cols = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
df_note_freqs = pd.DataFrame(freq_array, columns=cols)
print("NOTE FREQUENCIES IN WESTERN MUSIC")
df_note_freqs.head(10)

### Extract notes

#### Get the Frequencies

In [None]:
d = librosa.fft_frequencies(sr=sr, n_fft=2048)
DF = pd.DataFrame()
d[0] = 3

#### Extract the Intensities of Frequency per Window

In [None]:
def extractFeature(amp, minDb = -80): #minDb will extract only the frequencies with the specified intensity
    s, t = amp.shape
    Data = pd.DataFrame()

    for i in range(t):
        df = pd.DataFrame()
        p = amp[:,i]
        df['Time'] = [x for x in [i]*1025]
        df['Freq'] = [s for s in d]
        df['Inten'] = [s if s >= minDb else np.nan for s in p]
        df['Note'] = [i for i in librosa.hz_to_note(d)]
        df = df.dropna()
        Data = pd.concat([Data, df])

    return Data

#### Sample extract  a wave file

In [None]:
wav = "Audio Chunks/Guitar Major_2.wav"
file, sr = librosa.load(wav) #load audio
stft_out = getSTFT(file) 
spec_out = getSpec(stft_out)
power_out = getPower(spec_out)
amp_out = librosa.amplitude_to_db(spec_out, ref=np.max)
plt.figure(1, figsize=(20,10))
librosa.display.specshow(amp_out, y_axis='log', sr=sr, hop_length=512,
                         x_axis='time', cmap='inferno')
plt.colorbar(format="%+2.f")
plt.show()

In [None]:
Extracted = extractFeature(amp_out, -30)

### To-do 3:  Modelling and Performance Evaluation
Using `decision trees` or `ensemble models` (`scikit learn`), fit a classifier (from the dataframe) using the frequency and intensity as inputs and the chord as output.  Test its accuracy and report if it is sufficient for practical applications.  Check for the predictors (nodes) and report if they could cover the whole spectrum.