In [1]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics 

#Keras
import keras

from collections import Counter

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [8]:
import tensorflow as tf

In [None]:
def normalize(vector):
    # Subtract the mean, and scale to the interval [-1,1]
    vector_minusmean = vector - vector.mean()
    return vector_minusmean/np.abs(vector_minusmean).max()


def feature_extraction(music):
    y, sr= librosa.load(music, mono=True, duration=30)

    y=normalize(y)
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr) #spectral centroid
    rolloff=librosa.feature.spectral_rolloff(y, sr) #spectral rolloff
    flux=librosa.onset.onset_strength(y, sr) #spectral flux
    zcr=librosa.feature.zero_crossing_rate(y) #zero crossing rate
    mfcc=librosa.feature.mfcc(y=y, sr=sr)[:5] #first five mfcc vectors
        
    rmse=librosa.feature.rms(y=y, hop_length=sr)
    rmsmean=rmse.mean()
    low_energy=((y<rmsmean).sum())/y.shape[0] #low energy
        
    to_append = f'{centroid.mean()} {centroid.var()} {rolloff.mean()} {rolloff.var()} {flux.mean()} {flux.var()} {zcr.mean()} {zcr.var()} {low_energy} '    
    for e in mfcc:
        to_append += f'{e.mean()} {e.var()} '
    to_append=to_append.split()
    to_append=np.array(to_append, dtype=np.float64)
    
    X=scaler.transform(to_append.reshape(1,-1))
        
    return X

In [3]:
data = pd.read_csv('gtzan_timbral_feature.csv')

data = data.drop(['filename'],axis=1)

genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)


Counter(genre_list)

Counter({'blues': 100,
         'classical': 100,
         'country': 100,
         'disco': 100,
         'hiphop': 100,
         'jazz': 100,
         'metal': 100,
         'pop': 100,
         'reggae': 100,
         'rock': 100})

In [4]:
scaler = StandardScaler()
scaler.fit(np.array(data.iloc[:, :-1], dtype = float))
X = scaler.transform(np.array(data.iloc[:, :-1], dtype = float))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [48]:
y_test.shape[0]

200

In [5]:
from training.train import RandomForest

model=RandomForest(X_train, X_test, y_train, y_test)

총 200곡 중 61.00% 정확도로 장르를 맞춤


In [8]:
audio_path="C:\\Users\\admin\\OneDrive - 경희대학교\\2020년 1학기 강의자료\\데이터분석캡스톤디자인\\프로젝트\\음악\\"
song="My Way(jazz).wav"
user_feature=feature_extraction(audio_path+song)

genre_dict[int(model.predict(user_feature))]

'hiphop'

In [4]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(256, activation='relu'))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))


model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [5]:
history = model.fit(X_train,
                    y_train,
                    epochs=30,
                    batch_size=128)

results = model.evaluate(X_test, y_test)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [6]:
results

[1.0136216259002686, 0.675000011920929]

In [11]:

x_val = X_train[:200]
partial_x_train = X_train[200:]

y_val = y_train[:200]
partial_y_train = y_train[200:]

In [12]:
partial_x_train.shape

(600, 19)

In [33]:
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
#model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(partial_x_train,
          partial_y_train,
          epochs=100,
          batch_size=256,
          validation_data=(x_val, y_val))
results = model.evaluate(X_test, y_test)

Train on 600 samples, validate on 200 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [34]:
print("test loss: ", results[0])
print("test accuracy: ", results[1])

test loss:  1.3084785318374634
test accuracy:  0.625


In [7]:
#genre_dict={0:'blues', 1:'classical', 2: 'electro', 3:'hiphop', 4:'jazz', 5:'pop', 6:'rock'} #data_down, feature200_7
#genre_dict={0:'blues', 1:'classical', 2:'hiphop', 3:'jazz', 4:'pop', 5:'rock'} #data_down, feature200_7에서 electro 뺀거
genre_dict={0:'blues', 1:'classical', 2:'hiphop', 3:'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9:'rock'}


In [None]:
audio_path="C:\\Users\\admin\\OneDrive - 경희대학교\\2020년 1학기 강의자료\\데이터분석캡스톤디자인\\프로젝트\\음악\\"

for song in os.listdir(audio_path):
    if(song[-3:]=="wav"):
        user_feature=feature_extraction(audio_path+song)
        predictions = model.predict(user_feature)
        print(song, ":", genre_dict[np.argmax(predictions)])

Cemetery Gates(metal).wav : classical
Comme Ce Jour(classical).wav : classical
Enter Sandman(metal).wav : classical
epilogue-lalaland(classical).wav : classical
Fly Me To The Moon(jazz).wav : hiphop


In [19]:
"sdf.mp3"[-3:]

'mp3'

In [5]:
import torch
import torch.nn.functional as F
from torch.autograd import Variable

class Model(torch.nn.Module):
    def __init__(self, inp, outp):
        super(Model, self).__init__()
        self.linear1=torch.nn.Linear(inp, 256)
        self.linear2=torch.nn.Linear(128, 64)
        self.linear3=torch.nn.Linear(64, outp)
        
        def forward(self, x):
            relu1=F.relu(self.linear1(x))
            relu2=F.relu(self.linear2(relu1))
            y_pred = F.softmax(self.linear3(relu2))
            
            return y_pred

In [10]:
inp=X_train.shape[1]
outp=6

x=Variable(torch.Tensor(X_train))
y=Variable(torch.Tensor(y_train), requires_grad=False)

model=Model(inp, outp)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for t in range(500):
    y_pred=model(x)
    
    loss = criterion(y_pred, y)
    print(t, loss.data[0])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

NotImplementedError: 