In [1]:
!rm -rf song-classifier/
!git clone https://github.com/joconte/song-classifier.git

Cloning into 'song-classifier'...
remote: Enumerating objects: 194, done.[K
remote: Total 194 (delta 0), reused 0 (delta 0), pack-reused 194[K
Receiving objects: 100% (194/194), 1.35 GiB | 14.27 MiB/s, done.
Resolving deltas: 100% (14/14), done.
Checking out files: 100% (149/149), done.


In [2]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pathlib

In [3]:
def audio2image(folder, destination):
    directory = pathlib.Path(folder)
#     filenames = directory.iterdir()
    
    for filename in directory.glob('**/*.mp3'):
#     for i,filename in filenames:
        x, sample_rate = librosa.load(filename, offset=30,duration=30)
    
        n_fft = 1024
        hop_length = 256
        n_mels = 40
        fmin = 20
        fmax = sample_rate / 2 
    
        mel_spec_power = librosa.feature.melspectrogram(x, sr=sample_rate, n_fft=n_fft, 
                                                        hop_length=hop_length, 
                                                        n_mels=n_mels, power=2.0, 
                                                        fmin=fmin, fmax=fmax)
        mel_spec_db = librosa.power_to_db(mel_spec_power, ref=np.max)
        plt.imsave(pathlib.Path(destination).joinpath(filename.name[:-4]+".png"), mel_spec_db)
        print(filename)

In [4]:
#classes = ['classique', 'pop', 'rap', 'rock']
classes = ['classique', 'rap']

In [None]:
for classe in classes:
  audio2image("song-classifier/song/train/" + classe, "song-classifier/image-from-song/train/" + classe)
  audio2image("song-classifier/song/test/" + classe, "song-classifier/image-from-song/test/" + classe)

song-classifier/song/train/classique/Swan Lake Waltz - Tchaikovsky.mp3
song-classifier/song/train/classique/Émile Waldteufel - The Skater's Waltz, Op. 183.mp3
song-classifier/song/train/classique/Johann Sebastian Bach-Air on G String.mp3
song-classifier/song/train/classique/The  Entertainer - Scott Joplin (Orchestral).mp3
song-classifier/song/train/classique/Johannes Brahms - Danse hongroise N°5.mp3
song-classifier/song/train/classique/vivaldi-four-seasons-winter-linverno-complete-cynthia-freivogel-voices-of-music-4k-rv-297.mp3
song-classifier/song/train/classique/Maple leaf rag - Scott Joplin.mp3
song-classifier/song/train/classique/Beethoven - Sonate au Clair de Lune.mp3
song-classifier/song/train/classique/Mozart Symphony #40 in G Minor, K 550 - 1. Molto Allegro.mp3
song-classifier/song/train/classique/Franz Schubert - Erlkönig (Roi des Aulnes).mp3
song-classifier/song/train/classique/Pachelbel's Canon.mp3
song-classifier/song/train/classique/Haendel - Sarabande.mp3
song-classifier/

In [None]:
from fastai.vision import *
import warnings
warnings.filterwarnings('ignore')

import torch
torch.cuda.get_device_name(0)
#torch.cuda.set_device(0)

In [None]:
data = ImageDataBunch.from_folder(path="song-classifier/image-from-song/", train="train", test="test", valid_pct=0.25, bs=128).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3)

In [None]:
learn = cnn_learner(data, models.resnet50, metrics=accuracy)

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True)

In [None]:
lr = learn.recorder.min_grad_lr

In [None]:
learn.fit_one_cycle(4, slice(lr))

In [None]:
preds, y, losses = learn.get_preds(with_loss=True)

interp = ClassificationInterpretation(learn, preds, y, losses)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
#learn.save("")

In [None]:
# fine tuning
learn.unfreeze()

In [None]:
from IPython.display import Image
filename = 'AI-sport-recognition/manualTest/rugby-ball.jpg'
Image(filename=filename)



In [None]:
img = open_image(filename)

ypred = learn.predict(img)



category = data.classes[ypred[1].item()]
print(category)