<a href="https://colab.research.google.com/github/aggapova/music_genre_classiffication/blob/main/genre_classification_ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Building an Artificial Neural Network(ANN) for the music genre classification.

### 1. Importing  dataset and necessary libraries.

In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset

dataset = load_dataset("marsyas/gtzan")

In [3]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import keras
from keras.models import Sequential
import warnings
warnings.filterwarnings('ignore')

### 2. Converting the audio data files into PNG format images extracting the Spectrogram for every Audio.

In [4]:
cmap = plt.get_cmap('inferno')
plt.figure(figsize=(8, 8))

# Access genres from the loaded dataset
genres = dataset['train']['genre']
for i in range(len(genres)):
    g = genres[i]
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)

    # Access audio data from the loaded dataset
    audio_data = dataset['train']['file'][i]
    y, sr = librosa.load(audio_data, mono=True, duration=5)

    # Generate spectrogram and save as PNG
    plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB')
    plt.axis('off')
    plt.savefig(f'img_data/{g}/{i}.png')
    plt.clf()

<Figure size 800x800 with 0 Axes>

### 3. Creating a header for our csv file.

In [6]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

### 4. Extracting features from Spectrogram:
- MFCC,
- Spectral Centroid,
- Zero Crossing Rate,
- Chroma Frequencies,
-  Spectral Roll-off.

In [7]:
header = ["filename", "chroma_stft", "rmse", "spectral_centroid", "spectral_bandwidth", "rolloff", "zero_crossing_rate"] + [f"mfcc{i}" for i in range(1, 21)] + ["genre"]

file_path = 'dataset.csv'
with open(file_path, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)

for row in dataset['train']:
    audio_data = row['audio']
    genre = row['genre']

    y, sr = librosa.load(audio_data['path'], mono=True, duration=30)
    rmse = np.mean(librosa.feature.rms(y=y))
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    spec_cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)

    to_append = [row['file'], chroma_stft, rmse, spec_cent, spec_bw, rolloff, zcr] + list(mfcc) + [genre]

    with open(file_path, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(to_append)


### 5. Data preprocessing: loading CSV data, label encoding, feature scaling and data split into training and test set.

In [8]:
data = pd.read_csv('dataset.csv')
data.head()
data = data.drop(['filename'],axis=1) # Dropping unneccesary columns
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()#Encoding the Labels
y = encoder.fit_transform(genre_list)
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))#Scaling the Feature columns
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)#Dividing data into training and Testing set

### 6. Building an ANN model.

In [9]:
from keras.layers import Dense
from keras import layers

model = Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

### 7. Fit the model.

In [10]:
classifier = model.fit(X_train,
                    y_train,
                    epochs=100,
                    batch_size=128)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [11]:
evaluation_result = model.evaluate(X_test, y_test)

# Print the evaluation result
print("Test Loss:", evaluation_result[0])
print("Test Accuracy:", evaluation_result[1])

Test Loss: 1.5972604751586914
Test Accuracy: 0.6650000214576721
