In [3]:

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import pickle

# algorithms
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from sklearn.model_selection import GridSearchCV


I used cu121 here

In [4]:
use_gpu = torch.cuda.is_available()
print(use_gpu)

True


# Now I load all the values

In [81]:

genre_names = ['rock', 'pop', 'arabesk', 'turk_sanat', 'jazz', 'rap']

# Load the features.pkl file
with open('../data/features.pkl', 'rb') as file:
    all_features = pickle.load(file)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_features)

# Display the DataFrame (optional)
print(df.shape)

(637, 9)


In [82]:
column_names = list(df.columns)
print(column_names)
print(df['zcr'])

['stft_db', 'spectral_rolloff', 'zcr', 'chroma', 'mfccs', 'rms', 'spectral_centroid', 'file_name', 'label']
0      [[0.06005859375, 0.0927734375, 0.11376953125, ...
1      [[0.046875, 0.07421875, 0.10009765625, 0.10742...
2      [[0.03564453125, 0.060546875, 0.08349609375, 0...
3      [[0.076171875, 0.11865234375, 0.15478515625, 0...
4      [[0.02685546875, 0.0419921875, 0.06396484375, ...
                             ...                        
632    [[0.10693359375, 0.16162109375, 0.22216796875,...
633    [[0.0048828125, 0.00634765625, 0.00732421875, ...
634    [[0.025390625, 0.03369140625, 0.03564453125, 0...
635    [[0.01953125, 0.0234375, 0.03564453125, 0.0683...
636    [[0.0751953125, 0.099609375, 0.109375, 0.07958...
Name: zcr, Length: 637, dtype: object


# setting labels and x. I used only the 1D datas which are 4 lists: spectral_rolloff, zcr, rms, spectral_centroid. I combined them into a 1280x4 = 

In [83]:

def flatten_column(column):
    # Check the length of the first few elements to ensure they are 1280
    print(f"Length of first element in column: {len(column.iloc[0])}")
    return column.apply(lambda x: np.array(x[0]).reshape(-1))


# Flatten the lists in each feature column by averaging the values in the list
df['zcr'] = flatten_column(df['zcr'])
df['rms'] = flatten_column(df['rms'])
df['spectral_centroid'] = flatten_column(df['spectral_centroid'])

data = df[['spectral_rolloff', 'zcr', 'rms', 'spectral_centroid']]

# Flatten each row (concatenate the arrays in each row into a single feature vector)
# Function to flatten each row by concatenating all lists in the row
def flatten_row(row):
    # Concatenate all 4 lists into a single array
    return np.concatenate([np.array(row[col]).reshape(-1) for col in row.index])

# Apply the flattening function to each row
data = np.array(data.apply(lambda row: flatten_row(row), axis=1).tolist())

y = df['label'].values

# Output the shape of the flattened data
print(data.shape)

Length of first element in column: 1
Length of first element in column: 1
Length of first element in column: 1
(637, 5120)


In [84]:
print(data.shape)

(637, 5120)


# transforming and splitting data

In [85]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data using the scaler
data_scaled = scaler.fit_transform(data)
X_train, X_test, y_train, y_test = train_test_split(data_scaled, y, test_size=0.2)

# SVM train time

In [91]:
# Define the SVM classifier
svm_classifier = SVC(random_state=42)

# Define the parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'],
}

# Instantiate GridSearchCV
grid_search = GridSearchCV(estimator=svm_classifier, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Perform Grid Search Cross Validation
grid_search.fit(X_train, y_train)

# Prepare the data for storing results
accuracy_results = []

# Print accuracy for each parameter combination
for mean_score, params in zip(grid_search.cv_results_['mean_test_score'], grid_search.cv_results_['params']):
    print(f"Accuracy: {mean_score * 100:.2f}% for parameters: {params}")
    accuracy_results.append({
        'Parameters': str(params),  # Convert dictionary to string
        'Accuracy (%)': mean_score * 100  # Store accuracy for each combination
    })

# Get the best parameters from the grid search
best_params = grid_search.best_params_
print("\nBest Parameters:", best_params)

# Train the best model on the full training set
svm_classifier_best = grid_search.best_estimator_

# Predict with the best model on the test set
y_pred_best = svm_classifier_best.predict(X_test)

# Calculate accuracy of the best model
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Accuracy on test set with best model: {accuracy_best * 100:.2f}%")

# Append the best model results at the end
accuracy_results.append({
    'Parameters': str(best_params),  # Best Parameters as a string
    'Accuracy (%)': accuracy_best * 100  # Accuracy for the best model
})

Accuracy: 53.05% for parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 31.44% for parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Accuracy: 43.62% for parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 50.69% for parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Accuracy: 53.05% for parameters: {'C': 0.1, 'gamma': 'auto', 'kernel': 'linear'}
Accuracy: 31.44% for parameters: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Accuracy: 43.81% for parameters: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Accuracy: 50.88% for parameters: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Accuracy: 53.05% for parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 43.23% for parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'poly'}
Accuracy: 64.63% for parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 52.44% for parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Accuracy: 53.05% for parameters: {'C': 1

In [92]:
# Convert to DataFrame
results_df = pd.DataFrame(accuracy_results)

# Save the results to CSV
results_df.to_csv('results/svm.csv', index=False)

print("GridSearchCV results saved to 'grid_search_best_results.csv'")

GridSearchCV results saved to 'grid_search_best_results.csv'
