In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import pickle
import random
import operator
from scipy.spatial.distance import euclidean
import tkinter as tk
from tkinter import filedialog

def getNeighbors(trainingSet, instance, k):
    distances = []
    
    # Calculate distances between the input instance and each training instance
    for training_instance in trainingSet:
        # Calculate the distance using the imported function
        dist = euclidean(instance[0], training_instance[0])
        
        # Append the class label and distance to the list
        distances.append((training_instance[2], dist))
    
    # Sort distances in ascending order
    distances.sort(key=operator.itemgetter(1))
    
    # Select the first k neighbors
    neighbors = [distances[i][0] for i in range(min(k, len(distances)))]
    
    return neighbors

def nearestClass(neighbors):
    classVote = {}

    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVote:
            classVote[response] += 1 
        else:
            classVote[response] = 1

    sorter = sorted(classVote.items(), key = operator.itemgetter(1), reverse=True)
    return sorter[0][0]

def getAccuracy(testSet, predictions):
    correct = 0 
    for x in range (len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return 1.0 * correct / len(testSet)

def create_data_file(output_file, data_directory):
    # Open the output file
    with open(output_file, 'wb') as f:
        folder_count = 0

        # Iterate over folders in the data directory
        for folder in os.listdir(data_directory):
            folder_count += 1
            # if folder_count == 11:
            #     break  # Stop after processing 10 folders

            folder_path = os.path.join(data_directory, folder)
            if os.path.isdir(folder_path):
                # Iterate over files in the folder
                for file_name in os.listdir(folder_path):
                    file_path = os.path.join(folder_path, file_name)
                    if file_name.endswith('.wav'):
                        try:
                            # Read audio file
                            rate, sig = wav.read(file_path)
                            
                            # Calculate MFCC features
                            mfcc_feat = mfcc(sig, rate, winlen=0.02, appendEnergy=False)
                            
                            # Calculate covariance and mean matrix
                            covariance = np.cov(np.transpose(mfcc_feat))
                            mean_matrix = mfcc_feat.mean(0)
                            
                            # Create a data point tuple
                            data_point = (mean_matrix, covariance, folder_count)
                            
                            # Serialize the data point and write to the file
                            pickle.dump(data_point, f)
                        except Exception as e:
                            print(f"Error processing file {file_path}: {e}")

def loadDataset(filename, split_ratio, trSet, teSet):
    # Check if file exists
    if not os.path.exists(filename):
        print(f"File '{filename}' does not exist.")
        return

    # Reset the dataset list to avoid appending to an existing list
    dataset = []

    # Load data from the binary file
    with open(filename, 'rb') as f:
        while True:
            try:
                data_point = pickle.load(f)
                dataset.append(data_point)
            except EOFError:
                break  # End of file, stop reading

    # Print the number of data points loaded
    print(f"Total number of data points loaded: {len(dataset)}")

    # Split the dataset into training and test sets
    for data_point in dataset:
        if random.random() < split_ratio:
            trSet.append(data_point)
        else:
            teSet.append(data_point)
    
    # Print the sizes of the training and test sets
    print(f"Training set size: {len(trSet)}")
    print(f"Test set size: {len(teSet)}")

def predict_genre(audio_file_path):
    # Load the trained model
    testSet = []
    trainingSet = []
    loadDataset("my.dat", 0.66, trainingSet, testSet)
    leng = len(testSet)
    predictions = []

    # Read the audio file
    rate, sig = wav.read(audio_file_path)

    # Calculate MFCC features
    mfcc_feat = mfcc(sig, rate, winlen=0.02, appendEnergy=False)
    covariance = np.cov(np.transpose(mfcc_feat))
    mean_matrix = mfcc_feat.mean(0)

    for x in range (leng):
        predictions.append(nearestClass(getNeighbors(trainingSet, [mean_matrix, covariance], 5))) 

    accuracy1 = getAccuracy(testSet, predictions)

    return predictions[0]

def browse_file():
    audio_file_path = filedialog.askopenfilename()
    if audio_file_path:
        file_path_label.config(text="Selected File: " + audio_file_path)
        genre_label.config(text="Predicted Genre: " + predict_genre(audio_file_path))
    else:
        file_path_label.config(text="No file selected")
        genre_label.config(text="Predicted Genre: ")

# GUI setup
root = tk.Tk()
root.title("Audio Genre Classifier")

frame = tk.Frame(root)
frame.pack(pady=20)

label = tk.Label(frame, text="Select an audio file:")
label.pack()

browse_button = tk.Button(frame, text="Browse", command=browse_file)
browse_button.pack(pady=10)

file_path_label = tk.Label(frame, text="Selected File: ")
file_path_label.pack()

genre_label = tk.Label(frame, text="Predicted Genre: ")
genre_label.pack()

root.mainloop()