### Genre Classification from audio file

##### Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import IPython
import scipy 
import sys
import pickle 
import librosa 
import librosa.display
from IPython.display import Audio 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder 
import tensorflow as tf 
from tensorflow import keras 

##### Data Load and Visualization

In [None]:
df = pd.read_csv('./Data/features_3_sec.csv')
df = df.drop(labels = 'filename', axis = 1)

In [None]:
audio_recording = "./Data/genres_original/country/country.00050.wav"
data , sr = librosa.load(audio_recording)
# print(type(data), type(sr))
librosa.load(audio_recording, sr = 45600)

In [None]:
IPython.display.Audio(data, rate = sr)

In [None]:
plt.figure(figsize = (12, 4))
librosa.display.waveplot(data, color = "#2B4F72")
plt.show()

In [None]:
stft = librosa.stft(data)
stft_db = librosa.amplitude_to_db(abs(stft))
plt.figure(figsize = (14, 6))
librosa.display.specshow(stft, sr = sr, x_axis = 'time', y_axis = 'hz')
plt.colorbar()

In [None]:
stft = librosa.stft(data)
stft_db = librosa.amplitude_to_db(abs(stft))
plt.figure(figsize = (14, 6))
librosa.display.specshow(stft_db, sr = sr, x_axis = 'time', y_axis = 'hz')
plt.colorbar()

In [None]:
from sklearn.preprocessing import normalize
spectral_rolloff = librosa.feature.spectral_rolloff(data+0.01, sr = sr)[0]
plt.figure(figsize = (12, 4))
librosa.display.waveplot(data, sr = sr, alpha = 0.4, color = "#2B4F72")

In [None]:
import librosa.display as lplt 
chroma = librosa.feature.chroma_stft(data, sr = sr)
plt.figure(figsize = (16, 6))
lplt.specshow(chroma, sr=sr, x_axis = 'time', y_axis = 'chroma', cmap = 'coolwarm')
plt.colorbar()
plt.title("Chroma Features")
plt.show()

In [None]:
start = 1000
end = 1200
plt.figure(figsize = (14, 5))
plt.plot(data[start:end], color = "#2B4F72")
plt.grid()

In [None]:
zero_cross_rate = librosa.zero_crossings(data[start:end], pad = False)
print("The number of zero-crossings is :", sum(zero_cross_rate))

##### Data Preprocessing

In [None]:
class_list = df.iloc[:, -1]
convertor = LabelEncoder()
y = convertor.fit_transform(class_list)

In [None]:
from sklearn.preprocessing import StandardScaler
fit = StandardScaler()
X = fit.fit_transform(np.array(df.iloc[:,:-1], dtype = float))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33)
print(len(y_train), len(y_test))

##### CNN Implementation

In [None]:
from keras.models import Sequential

In [None]:
def trainModel(model, epochs, optimizer):
  batch_size = 128
  model.compile(optimizer=optimizer, loss ='sparse_categorical_crossentropy', metrics = 'accuracy')
  return model.fit(X_train, y_train, validation_data =(X_test, y_test), epochs=epochs, batch_size = batch_size)

In [None]:
def plotValidate(history):
  print("Validation Accuracy",max(history.history["val_accuracy"]))
  pd.DataFrame(history.history).plot(figsize = (12,6))
  plt.show()

In [None]:
model = keras.models.Sequential([
    keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(10, activation='softmax'),

])
print(model.summary())
model_history = trainModel(model=model, epochs=100, optimizer='adam')

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, batch_size=128)
print("The test Loss is :", test_loss)
print("\nThe Best test Accuracy is:", test_acc*100)

##### KNN Implementation

In [None]:
!pip install python_speech_features
!pip install scipy

In [None]:
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from tempfile import TemporaryFile
import os
import math
import pickle
import random
import operator

In [None]:
def getNeighbors(trainingset, instance, k):
    distances = []
    for x in range(len(trainingset)):
        dist = distance(trainingset[x], instance, k) + distance(instance,trainingset[x],k)
        distances.append((trainingset[x][2], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

In [None]:
def nearestclass(neighbors):
    classVote = {}
    
    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVote:
            classVote[response] += 1
        else:
            classVote[response] = 1
            
    sorter = sorted(classVote.items(), key=operator.itemgetter(1), reverse=True)
    return sorter[0][0]

In [None]:
def getAccuracy(testSet, prediction):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == prediction[x]:
            correct += 1
    return 1.0 * correct / len(testSet)

In [None]:
dataset = []
def loadDataset(filename, split, trset, teset):
    with open('knndata.dat','rb') as f:
        while True:
            try:
                dataset.append(pickle.load(f))
            except EOFError:
                f.close()
                break
    for x in range(len(dataset)):
        if random.random() < split:
            trset.append(dataset[x])
        else:
            teset.append(dataset[x])

In [None]:
def distance(instance1, instance2, k):
    distance = 0
    mm1 = instance1[0]
    cm1 = instance1[1]
    mm2 = instance2[0]
    cm2 = instance2[1]
    distance = np.trace(np.dot(np.linalg.inv(cm2), cm1))
    distance += (np.dot(np.dot((mm2-mm1).transpose(), np.linalg.inv(cm2)), mm2-mm1))
    distance += np.log(np.linalg.det(cm2)) - np.log(np.linalg.det(cm1))
    distance -= k
    return distance

In [None]:
trainingSet = []
testSet = []
loadDataset('knndata.dat', 0.68, trainingSet, testSet)

In [None]:
length = len(testSet)
predictions = []
for x in range(length):
    predictions.append(nearestclass(getNeighbors(trainingSet, testSet[x], 5)))

accuracy1 = getAccuracy(testSet, predictions)
print(accuracy1)