In [1]:
import numpy as np
import pandas as pd
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from tempfile import TemporaryFile
import os
import math
import pickle
import random
import operator

In [2]:
#define a function to get distance between feature vectors and find neighbors
def getNeighbors(trainingset, instance, k):
    #This is a function definition that takes in three arguments: trainingset: a list of training instances (tuples) with labels
    #instance: the instance that we want to find the neighbors for k: the number of neighbors we want to find
    distances = []
    for x in range(len(trainingset)):
        dist = distance(trainingset[x], instance, k) + distance(instance,trainingset[x],k)
        distances.append((trainingset[x][2], dist))
        #This loop calculates the distance between the instance and all the instances in the trainingset.
        #For each instance in the trainingset, it calculates the distance by calling the distance function twice:
            #distance(trainingset[x], instance, k) calculates the distance between the current trainingset instance and the instance.
            #distance(instance,trainingset[x],k) calculates the distance between the instance and the current trainingset instance.
        #The two distances are added together to get the total distance between the two instances.
        #The label of the current trainingset instance and its distance to the instance are added to the distances list as a tuple.
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
        #This loop sorts the distances list by the distance value (the second item in each tuple).
    return neighbors

In [3]:
#function to identify the nearest neighbors
#This function takes in a list of neighbors (i.e., the labels of the k closest instances to a given instance).
#The function then creates a dictionary classVote to keep track of the number of instances with each label.
def nearestclass(neighbors):
    classVote = {}
    #For each label in neighbors, the function checks if it is already in the classVote dictionary. If it is, the count for that label is incremented; otherwise, a new key is added with a count of 1.
    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVote:
            classVote[response] += 1
        else:
            classVote[response] = 1
    #Now, classVote dictionary is sorted in descending order of counts, using the sorted() function and the operator.itemgetter() method.
    #The label with the highest count is returned as the output of the function. This label represents the predicted class for the given instance.       
    sorter = sorted(classVote.items(), key=operator.itemgetter(1), reverse=True)
    return sorter[0][0]


In [4]:
def getAccuracy(testSet, prediction):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == prediction[x]:
            correct += 1
    return 1.0 * correct / len(testSet)
    #For each instance in testSet, the function checks if the predicted label (in prediction[x]) matches the true label 
    # (which is the last element of the instance, denoted by testSet[x][-1]).
    #If the predicted label matches the true label, then the correct count is incremented by 1.
    #Once all instances have been checked, the function returns the accuracy of the predictions as a decimal value between 0 and 1. 
    # This is calculated by dividing the number of correct predictions by the total number of instances in testSet.

In [5]:
directory = 'C:/Users/DELL/Desktop/Quarter_3/Speech_Processing/Project/Data/genres_original'

f = open("mydataset.dat", "wb")
i = 0
for folder in os.listdir(directory):
    print(folder)
    i += 1
    if i == 11:
        break
    for file in os.listdir(directory+"/"+folder):
        #print(file)
        try:
            (rate, sig) = wav.read(directory+"/"+folder+"/"+file)
            #Mel-frequency cepstral coefficients (MFCCs) are computed for each audio file using the mfcc() function from the speech recognition library.
            #The mean and covariance of the MFCC features are computed and stored in a tuple along with a label for the genre of the audio file.
            mfcc_feat = mfcc(sig, rate, winlen = 0.020, appendEnergy=False)
            covariance = np.cov(np.matrix.transpose(mfcc_feat))
            mean_matrix = mfcc_feat.mean(0)
            feature = (mean_matrix, covariance, i)
            pickle.dump(feature, f)
        #If an exception is raised during processing of a file, the exception is caught and an error message is printed, 
        # along with the folder and filename where the exception occurred.
        except Exception as e:
            print("Got an exception: ", e, 'in folder: ', folder, ' filename: ', file)
f.close()

blues
classical
country
disco
hiphop
jazz
metal
pop
reggae
rock


In [6]:
dataset = []

def loadDataset(filename, split, trset, teset):
    #The function opens the binary file specified by the filename using the open() function in binary read mode and reads in the data using pickle.load().
    with open('mydataset.dat','rb') as f:
        while True:
            #The data is appended to the dataset list until the end of the file is reached.
            try:
                dataset.append(pickle.load(f))
            except EOFError:
                f.close()
                break
    #For each item in dataset, a random number between 0 and 1 is generated using the random.random() function. If the number is less than the split ratio, 
    #the item is added to the training set, otherwise it is added to the test set.
    for x in range(len(dataset)):
        if random.random() < split:
            trset.append(dataset[x])
        else:
            teset.append(dataset[x])

trainingSet = []
testSet = []
loadDataset('my.dat', 0.68, trainingSet, testSet)

In [7]:
#The function computes the Mahalanobis distance to find similarities between the two instances
#The formula for Mahalanobis distance is:
#D² = (x - μ)ᵀ Σ⁻¹ (x - μ)
def distance(instance1, instance2, k):
    distance = 0
    mm1 = instance1[0]
    cm1 = instance1[1]
    mm2 = instance2[0]
    cm2 = instance2[1]
    distance = np.trace(np.dot(np.linalg.inv(cm2), cm1))
    distance += (np.dot(np.dot((mm2-mm1).transpose(), np.linalg.inv(cm2)), mm2-mm1))
    distance += np.log(np.linalg.det(cm2)) - np.log(np.linalg.det(cm1))
    distance -= k
    return distance

In [8]:
# Make the prediction using KNN(K nearest Neighbors)
length = len(testSet)
predictions = []
#making predictions for each instance in the test set by calling the getNeighbors() function to get the k nearest neighbors in the training set for each instance, 
# and then calling the nearestclass() function to determine the predicted class based on the majority vote of those neighbors. The value of k used is 5.
for x in range(length):
    predictions.append(nearestclass(getNeighbors(trainingSet, testSet[x], 5)))
#The predicted classes are stored in a list called predictions. The accuracy of the predictions is calculated using the getAccuracy() function by comparing 
# each predicted class to the actual class in the test set, and returning the proportion of correct predictions.
accuracy1 = getAccuracy(testSet, predictions)
print(accuracy1)

0.7194029850746269


In [9]:
from collections import defaultdict
results = defaultdict(int)
#defaultdict class from the collections module to create a dictionary results with integer keys and string values. 
# The integer keys start from 5 and are incremented by 1 for each folder in the directory.
directory = 'C:/Users/DELL/Desktop/Quarter_3/Speech_Processing/Project/Data/genres_original'
#Then, the nearestclass function is called with arguments dataset, feature, and 3, which returns a predicted class label. 
i = 5
for folder in os.listdir(directory):
    results[i] = folder
    i += 1
# Finally, the predicted class label is looked up in the results dictionary to get the corresponding genre name, which is printed to the console.
pred = nearestclass(getNeighbors(dataset, feature, 3))
print(results[pred])

jazz
