### Importing required libraries

In [125]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import scipy
import os
import pickle
import librosa
import librosa.display
import IPython.display as ipd
from IPython.display import Audio
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential

In [126]:
#reading csv file
df = pd.read_csv("../input/gtzandata/Data/features_3_sec.csv")
df.head()

In [127]:
#shape of the data
df.shape

In [128]:
df.dtypes

In [129]:
audio ="../input/gtzandata/Data/genres_original/blues/blues.00010.wav"
signal,sr=librosa.load(audio)
print(type(signal),type(sr))

In [130]:
librosa.load(audio,sr=45600)

In [131]:
y = librosa.stft(signal)  # STFT of y
S_db = librosa.amplitude_to_db(np.abs(y), ref=np.max)

In [132]:
import IPython
IPython.display.Audio(signal,rate=sr)

In [133]:
plt.figure(figsize=(7,4))
librosa.display.waveshow(signal,color="#2B4F72", alpha = 0.5)
plt.show()

In [134]:
stft=librosa.stft(signal)
stft_abs =librosa.amplitude_to_db(abs(stft))
plt.figure(figsize=(7,6))
librosa.display.specshow(stft,sr=sr,x_axis='time',y_axis='hz')
plt.colorbar()

In [135]:
stft=librosa.stft(signal)
stft_abs=librosa.amplitude_to_db(abs(stft))
plt.figure(figsize=(7,6))
librosa.display.specshow(stft_abs,sr=sr,x_axis='time',y_axis='hz')
plt.colorbar()

## Spectral Roll off

In [136]:
spec_roll=librosa.feature.spectral_rolloff(signal+0.01,sr=sr)[0]
plt.figure(figsize=(7,6))
librosa.display.waveshow(signal,sr=sr,alpha=0.4,color="#2B4F72")

## Chroma featue

In [137]:
import librosa.display as lplt
chroma = librosa.feature.chroma_stft(signal,sr=sr)
plt.figure(figsize=(7,4))
lplt.specshow(chroma,sr=sr,x_axis="time",y_axis="chroma",cmap="jet")
plt.colorbar()
plt.title("Chroma Features")
plt.show()

## Zero Crossing Rate

In [138]:
start=1000
end=1200
plt.figure(figsize=(7,4))
plt.plot(signal[start:end],color="#2B4F72")
plt.grid()

In [139]:
zcr =librosa.zero_crossings(signal[start:end],pad=False)
print("the numbert of zero_crossings is :", sum(zcr))

## EDA

In [140]:
# BLUES
audio1= '../input/gtzandata/Data/genres_original/blues/blues.00001.wav'
ipd.Audio(audio1) 
signal, sr = librosa.load(audio1)
plt.figure(figsize=(8, 3))
librosa.display.waveshow(signal, sr=sr,alpha=0.4)
plt.title('Waveplot - BLUES')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(8, 5))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - BLUES')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play


In [141]:
# CLASSICAL
ipd.Audio(audio1) 
audio1= '../input/gtzandata/Data/genres_original/classical/classical.00001.wav'
signal, sr = librosa.load(audio1)
plt.figure(figsize=(8, 3))
plt.title('Waveplot - CLASSICAL') 
librosa.display.waveshow(signal, sr=sr,alpha=0.4)


# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(8, 5))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram -CLASSICAL')
plt.colorbar(format='%+2.0f dB');


# playing audio
ipd.Audio(audio1) 

In [142]:
# COUNTRY
audio1= '../input/gtzandata/Data/genres_original/country/country.00001.wav'
ipd.Audio(audio1)
signal, sr = librosa.load(audio1)

plt.figure(figsize=(8, 3))
librosa.display.waveshow(signal, sr=sr,alpha=0.4)
plt.title('Waveplot - COUNTRY')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(10, 5))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - COUNTRY')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play

In [143]:
# DISCO
audio1= '../input/gtzandata/Data/genres_original/disco/disco.00001.wav'
signal, sr = librosa.load(audio1)
plt.figure(figsize=(8, 3))
librosa.display.waveshow(signal, sr=sr,alpha=0.4)
plt.title('Waveplot - DISCO')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(10, 5))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - DISCO')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) 


In [144]:
# HIPHOP
audio1= '../input/gtzandata/Data/genres_original/hiphop/hiphop.00001.wav'
signal, sr = librosa.load(audio1)
ipd.Audio(audio1)
plt.figure(figsize=(8, 3))
librosa.display.waveshow(signal, sr=sr, alpha = 0.4)
plt.title('Waveplot - HIPHOP')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(10, 5))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000,) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - HIPHOP')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play


In [145]:
# JAZZ
audio1= '../input/gtzandata/Data/genres_original/jazz/jazz.00001.wav'
signal, sr = librosa.load(audio1)
plt.figure(figsize=(8, 3))
librosa.display.waveshow(signal, sr=sr,alpha=0.4)
plt.title('Waveplot - JAZZ')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(7, 4))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - JAZZ')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play

In [146]:
# METAL
audio1= '../input/gtzandata/Data/genres_original/metal/metal.00001.wav'
signal, sr = librosa.load(audio1)
plt.figure(figsize=(8, 3))
librosa.display.waveshow(signal, sr=sr,alpha=0.4)
plt.title('Waveplot - METAL')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(7, 4))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - METAL')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play

In [147]:
# POP
audio1= '../input/gtzandata/Data/genres_original/pop/pop.00001.wav'
data, sr = librosa.load(audio1)
plt.figure(figsize=(7, 4))
librosa.display.waveshow(data, sr=sr,alpha=0.4)
plt.title('Waveplot - POP') 

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(7, 4))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - POP')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play


In [148]:
# REGGAE
audio1= '../input/gtzandata/Data/genres_original/reggae/reggae.00001.wav'
data, sr = librosa.load(audio1)
plt.figure(figsize=(7, 4))
librosa.display.waveshow(signal, sr=sr,alpha=0.4)
plt.title('Waveplot - REGGAE')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(7, 4))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - REGGAE')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play

In [149]:
# ROCK
audio1= '../input/gtzandata/Data/genres_original/rock/rock.00001.wav'
data, sr = librosa.load(audio1)
plt.figure(figsize=(8, 3))
librosa.display.waveshow(data, sr=sr,alpha=0.4)
plt.title('Waveplot - ROCK')

# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(7, 4))
spectrogram = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=128,fmax=8000) 
spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(spectrogram, y_axis='mel', fmax=8000, x_axis='time');
plt.title('Mel Spectrogram - ROCK')
plt.colorbar(format='%+2.0f dB');
ipd.Audio(audio1) #audio play

In [150]:
#finding misssing values
# find all columns with any NA values
print("Columns containing missing values",list(df.columns[df.isnull().any()]))

In [151]:
#Label Encoding
class_encod=df.iloc[:,-1]
converter=LabelEncoder()
y=converter.fit_transform(class_encod)
y

In [152]:
#features
print(df.iloc[:,:-1])

In [153]:
df=df.drop(labels="filename",axis=1)

In [154]:
#scaling
from sklearn.preprocessing import StandardScaler
fit=StandardScaler()
x=fit.fit_transform(np.array(df.iloc[:,:-1],dtype=float))

In [155]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)

In [156]:
len(y_test)

In [157]:
len(y_train)

## KNN

In [158]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

cm1=KNeighborsClassifier(n_neighbors=3)
cm1.fit(x_train,y_train)
y_pred=cm1.predict(x_test)
print("Testing Accuracy: {:.3f}".format(cm1.score(x_test, y_test)))
print("Training Accuracy: {:.3f}".format(cm1.score(x_train, y_train)))
confusion = confusion_matrix(y_test, y_pred)
sns.set(rc = {'figure.figsize':(8,3)})
sns.heatmap(confusion, annot=True)
print(classification_report(y_test,y_pred))

### test Accuracy of KNN = 89%

## SVM

In [159]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf', degree=8)
svclassifier.fit(x_train, y_train)
print("Testing Accuracy: {:.3f}".format(svclassifier.score(x_test, y_test)))
print("Training Accuracy: {:.3f}".format(svclassifier.score(x_train, y_train)))
y_pred = svclassifier.predict(x_test)
confusion = confusion_matrix(y_test, y_pred)
sns.set(rc = {'figure.figsize':(9,4)})
sns.heatmap(confusion, annot=True)
print(classification_report(y_test, y_pred))

### test Accuracy of SVM = 85%

In [160]:
def train_model(model,epochs,optimizer):
    batch_size=128
    model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics='accuracy')
    return model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=epochs,batch_size=batch_size)

In [161]:
def Validation_plot(history):
    print("Validation Accuracy",max(history.history["val_accuracy"]))
    pd.DataFrame(history.history).plot(figsize=(10,6))
    plt.show()

In [162]:
model=tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(x.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(512,activation='relu'),
    keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(256,activation='relu'),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(128,activation='relu'),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(10,activation='softmax'),
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
             loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.summary()
model_history=train_model(model=model,epochs=600,optimizer='adam')


In [167]:
test_loss,test_acc=model.evaluate(x_test,y_test,batch_size=256)
print("Test loss is: ",test_loss)
print("Best accuracy is: ",test_acc*100)

In [168]:
Validation_plot(model_history)

In [169]:
sample = x_test
sample = sample[np.newaxis,]
pred = model.predict(x_test)
pred_idx = np.argmax(pred, axis = 1)
print("Genre Expected : {}, Genre Predicted: {}".format(y_test, pred_idx))

In [170]:
#Confusion Matrix
import seaborn as sn
import matplotlib.pyplot as plt
pred_x = model.predict(x_test)
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,pred_idx )
cm


## CNN test Accuracy = 94%