### Etape nous permettant d'utiliser Google Colab (Optionnel)

In [1]:
!rm -rf song-classifier/
!git clone https://github.com/joconte/song-classifier.git

Cloning into 'song-classifier'...
remote: Enumerating objects: 194, done.[K
remote: Total 194 (delta 0), reused 0 (delta 0), pack-reused 194[K
Receiving objects: 100% (194/194), 1.35 GiB | 14.27 MiB/s, done.
Resolving deltas: 100% (14/14), done.
Checking out files: 100% (149/149), done.


In [None]:
%cd song-classifier

### Chargement des dépendances pour la transformation d'audio en image

In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import warnings
warnings.filterwarnings('ignore')

### Création de notre méthode de transformation d'audio en image

In [2]:
def audio2imageByFile(audioFilePath, imageFilePath):
    try:
        x, sample_rate = librosa.load(audioFilePath, offset=30,duration=30)

        n_fft = 1024
        hop_length = 256
        n_mels = 40
        fmin = 20
        fmax = sample_rate / 2 

        mel_spec_power = librosa.feature.melspectrogram(x, sr=sample_rate, n_fft=n_fft, 
                                                        hop_length=hop_length, 
                                                        n_mels=n_mels, power=2.0, 
                                                        fmin=fmin, fmax=fmax)
        mel_spec_db = librosa.power_to_db(mel_spec_power, ref=np.max)
        plt.imsave(imageFilePath, mel_spec_db)
        print(audioFilePath)
    except Exception as e:
        print(e)
        print("Error on file " + str(audioFilePath))

In [3]:
def audio2image(folder, destination):
    directory = pathlib.Path(folder)
    
    for filename in directory.glob('**/*.mp3'):
        audio2imageByFile(filename, pathlib.Path(destination).joinpath(filename.name[:-4]+".png"))
            

### Conversion de nos fichiers audio en images

In [21]:
classes = ['classique', 'hard-tech', 'rap','rock']

In [22]:
for classe in classes:
    audio2image("song/train/" + classe, "image-from-song/train/" + classe)
    audio2image("song/test/" + classe, "image-from-song/test/" + classe)

song/train/classique/beethoven-moonlight-sonata-piano-orchestra.mp3
song/train/classique/Swan Lake Waltz - Tchaikovsky.mp3
song/train/classique/Mozart Symphony #40 in G Minor, K 550 - 1. Molto Allegro.mp3
song/train/classique/Vivaldi - Cello Concerto in C Minor, RV 402 - 1, Allegro.mp3
song/train/classique/Karajan - Brahms Symphony No. 2 in D, Op. 73 - I. Allegro non troppo (Part 1).mp3
song/train/classique/vivaldi-four-seasons-winter-linverno-complete-cynthia-freivogel-voices-of-music-4k-rv-297.mp3
song/train/classique/Franz Schubert - Erlkönig (Roi des Aulnes).mp3
song/train/classique/johannes brahms hungarian dance 1 one.mp3
song/train/classique/Pachelbel's Canon.mp3
song/train/classique/Émile Waldteufel - The Skater's Waltz, Op. 183.mp3
song/train/classique/Johannes Brahms - Danse hongroise N°5.mp3
song/train/classique/Prokofiev - Roméo et Juliette - Danse des Chevaliers.mp3
song/train/classique/The  Entertainer - Scott Joplin (Orchestral).mp3
song/train/classique/Wolfgang Amade

song/train/rap/wonderful.mp3
song/train/rap/Gucci Mane - Both Sides feat. Lil Baby.mp3
song/train/rap/The Kid LAROI, Juice WRLD - GO (Official Video).mp3
song/train/rap/Eminem - Beautiful (Official Music Video).mp3
song/train/rap/Lil Durk - 3 Headed Goat ft. Lil Baby & Polo G (Dir. by @_ColeBennett_).mp3
song/train/rap/gambi-popopop-clip-officiel.mp3
song/train/rap/Lpb Poody _ Address it _ [ Official Audio ].mp3
song/train/rap/A Boogie Wit Da Hoodie - Bleed [Official Music Video].mp3
song/train/rap/Eminem - Sing For The Moment.mp3
song/train/rap/DaBaby – ROCKSTAR FT RODDY RICCH [Audio].mp3
song/train/rap/NLE Choppa - Shotta Flow 5 (Dir. by @_ColeBennett_).mp3
song/train/rap/Young Thug, Travis Scott - Pick Up the Phone (Explicit) (Official Music Video) ft. Quavo.mp3
song/train/rap/21 Savage, Offset & Metro Boomin - 'Ghostface Killers' Ft Travis Scott (Official Audio).mp3
song/train/rap/Jul - J'oublie tout [Son Officiel].mp3
song/train/rap/Moneybagg Yo - Me Vs Me (Official Music Video).m

song/test/rock/Queen – Bohemian Rhapsody (Official Video Remastered).mp3
song/test/rock/DEF LEPPARD - 'Pour Some Sugar On Me' (Official Music Video).mp3
song/test/rock/Bad Company - Feel Like Making Love.mp3
song/test/rock/Pink Floyd - Money (Official Music Video).mp3
song/test/rock/Meat Loaf - I'd Do Anything For Love (But I Won't Do That) (Official Music Video).mp3
song/test/rock/Joan Jett & the Blackhearts - I Love Rock N Roll.mp3
song/test/rock/Pink Floyd - Wish You Were Here (with lyrics).mp3
song/test/rock/Bon Jovi - Wanted Dead Or Alive (Official Music Video).mp3
song/test/rock/Creedence Clearwater Revival - Have You Ever Seen The Rain.mp3
song/test/rock/santana- black magic woman.mp3
song/test/rock/Rainbow - Since You've Been Gone.mp3
song/test/rock/Van Halen - Jump (Official Music Video).mp3
song/test/rock/Aerosmith - Livin' On The Edge (Official Music Video).mp3
song/test/rock/Dio - Heaven And Hell Live 1986.mp3
song/test/rock/Gary Moore - Still Got The Blues.mp3
song/test/ro

### Import des dépendances de FASTAI pour effectuer notre classification

In [6]:
from fastai.vision import *

In [None]:
import torch
torch.cuda.get_device_name(0)

### Chargement de notre dataset

In [None]:
data = ImageDataBunch.from_folder(path="image-from-song/", train="train", test="test", valid_pct=0.25, bs=32).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3)

### Chargement du model

In [None]:
learn = cnn_learner(data, models.resnet50, metrics=accuracy)

### Recherche du taux d'apprentisage optimal

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True)

In [None]:
lr = learn.recorder.min_grad_lr

### Entraînement

In [None]:
# On lance 4 epochs
learn.fit_one_cycle(4, slice(lr))

In [None]:
# Puis on lance les epochs un à un tant que le train_loss, le valid_loss baissent et l'accuracy augmente.
learn.fit_one_cycle(1, slice(lr))

# Au bout d'une dizaine d'epoch on obtient une accuracy de 90%

In [None]:
# On regarde la matrice de confusion
preds, y, losses = learn.get_preds(with_loss=True)

interp = ClassificationInterpretation(learn, preds, y, losses)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
# On décide de débloquer les poids prédéfinis sur le model resnet50
learn.unfreeze()

### Recalcul du taux d'apprentisage optimal

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True)

In [None]:
lr = learn.recorder.min_grad_lr

### Entrainement avec poids 'unfreeze'

In [None]:
# On lance les epochs un à un tant que le train_loss, le valid_loss baissent et l'accuracy augmente.
learn.fit_one_cycle(1, slice(lr))

# On obtient ici une accuracy de 97,37%

### Sauvegarde de notre modèle

In [None]:
# On n'utilise pas learn.save() parce que cela créé un fichier plus volumineux (320Mo vs 97Mo)
# qui ne contient pas simplement les poids mais un model complet qui est prêt à être réentrainé
learn.export()

### Chargement de notre modèle

In [7]:
learn = load_learner('image-from-song/')

### Prédiction

In [19]:
# La technique utilisée ici est légèrement différente de celle vue en cours.
# Ici on a l'avantage de ne pas avoir de lien avec l'objet 'data' et on a en plus le détail de la prédiction :
# La prédiction, le score en pourcentage pour chaque catégorie. 
# Ici vous pouvez donc simplement exécuter le chargement des dépendances de FASTAI, le chargement du modèle ci dessus ainsi que cette cellule.
import blah
audioFilenameBytes = blah.gui_fname() # <-- Demande de choisir un fichier audio
audioFilename = audioFilenameBytes.decode('utf-8')
imageFilename = "prediction.png"
!rm -rf prediction.png
audio2imageByFile(audioFilename, imageFilename)
img = open_image(imageFilename)
pred_class, pred_idx, losses = learn.predict(img)
print(pred_class)
print({"prediction": str(pred_class), "scores": sorted(zip(learn.data.classes, map(float, losses)), key=lambda p: p[1], reverse=True)})

/Users/jo/Downloads/song-classifier/song/test/hard-tech/N-Vitral presents BOMBSQUAD - Mainstream Mutilators [Official Music Video].mp3
hard-tech
{'prediction': 'hard-tech', 'scores': [('hard-tech', 0.9843913316726685), ('rock', 0.011994164437055588), ('classique', 0.0032370395492762327), ('rap', 0.0003773753414861858)]}
