In [None]:
# steps : Load the dataset: You can download the GTZAN dataset 
#         from the internet and load the audio files into your 
#         Python environment using a library such as librosa.

# Split the dataset into training and testing sets: Use 
#     scikit-learn's train_test_split function to split 
#     the dataset into a training set and a testing set. 
#     This function randomly splits the data into two 
#     subsets, one for training the model and one for 
#     testing the model's accuracy.

# Choose a machine learning algorithm: Choose a machine 
#     learning algorithm that is suitable for your data and 
#     problem. Some popular algorithms for classification 
#     tasks like this include decision trees, random forests, 
#     support vector machines, and neural networks.

# Train the model: Fit the chosen algorithm to the training 
#     data using scikit-learn's fit function. You can experiment 
#     with different models and hyperparameters to find the best 
#     model for your task.

# Test the model: Test the accuracy of the model on the testing
#     data using scikit-learn's predict function. This function
#     takes in the testing data and predicts the genre of each song.

# Evaluate the model: Evaluate the accuracy of the model using 
#     metrics such as accuracy, precision, recall, and F1-score.
#     You can use scikit-learn's metrics module to compute these
#     metrics.

# Tune the model: If the accuracy is not satisfactory, you can tune 
#     the model's hyperparameters to improve its performance. 
#     Scikit-learn provides tools for hyperparameter tuning, 
#     such as GridSearchCV and RandomizedSearchCV.

# Deploy the model: Once you are satisfied with the model's 
#     performance, you can deploy it to make predictions on new, 
#     unseen data.

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime 

start= datetime.now()

# Define the path to the folder containing the `images_original` directory
path = os.path.join(os.getcwd(), 'data')

# Create a list of the genre labels
genre_labels = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Load the spectrograms for each genre and append them to a list
X = []
y = []
for i, genre in enumerate(genre_labels):
    if (i == 2) :
        continue
    genre_path = os.path.join(path, 'images_original', genre)
    for file in os.listdir(genre_path):
        if file.endswith('.png'):
            img_path = os.path.join(genre_path, file)
            img = plt.imread(img_path)
            X.append(img)
            y.append(genre)

# Convert the lists to numpy arrays
X = np.array(X)
y = np.array(y)

# Reshape the data to have a channel dimension
X = X.reshape(X.shape[0], -1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the SVM model
svm = SVC(kernel='linear', C=0.1)

# Train the SVM model
svm.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm.predict(X_test)

# Calculate the accuracy of the predictions
acc = accuracy_score(y_test, y_pred)
print('Accuracy:', acc)

print ((datetime.now()-start))

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from sklearn.svm import SVC


audio_file = 'testfile.wav'


y, sr = librosa.load(audio_file)

# Generate mel spectrogram 
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

plt.figure(figsize=(10, 4))
librosa.display.specshow(mel_spec_db, x_axis='time', y_axis='mel', sr=sr, fmax=8000)
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.show()

# Flatten the spectrogram 
X_new = mel_spec_db.flatten().reshape(1, -1).astype('float32') / 255.



X_new_padded = np.array([0]*160640 + list(X_new.reshape(-1,1).flatten()) + [0]*160640)
y_pred = svm.predict(X_new_padded.reshape(1, -1))
# y_pred_prob = svm.predict_proba(X_new_padded.reshape(1, -1))
# print (y_pred_prob)

# Print the predicted genre label
genre_labels = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
print('Predicted genre:', y_pred)


In [None]:
X_new_padded = np.array([0]*160640 + list(X_new.reshape(-1,1).flatten()) + [0]*160640)
y_pred = svm.predict(X_new_padded.reshape(1, -1))
y_pred_prob = list(svm.predict_proba(X_new_padded.reshape(1, -1)))
print (y_pred_prob)
# for i,genre in enumerate(genre_labels):
#     print (genre, "probability : " , y_pred_prob[0][i])

In [None]:
 0.12645541+ 0.12097451 + 0.13628938 + 0.06627609 + 0.11576043 + 0.04846593 +  0.12659782 + 0.11477521 + 0.14440522