In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Define a dictionary to map genre names to numerical labels
genre_label_mapping = {
    'blues': 0,
    'classical': 1,
    'country': 2,
    'disco': 3,
    'hiphop': 4,
    'jazz': 5,
    'metal': 6,
    'pop': 7,
    'reggae': 8,
    'rock': 9
}

# Step 1: Read the train.csv and test.csv files
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Replace genre names in the "label" column of the train_data DataFrame with numerical labels
train_data['label'] = train_data['label'].map(genre_label_mapping)

# Extract features and labels from the train_data DataFrame
X = train_data.drop(["filename", "label"], axis=1)
y = train_data["label"]

# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (mean = 0, standard deviation = 1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(test_data.drop(["id"], axis=1))

# Train Support Vector Machine (SVM) Classifier
svm_classifier = SVC(probability=True)
svm_classifier.fit(X_train, y_train)
y_pred_svm = svm_classifier.predict_proba(X_val)

# Train K-Nearest Neighbors (KNN) Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)
y_pred_knn = knn_classifier.predict_proba(X_val)

# Combine predictions using a weighted average
weighted_average = 0.7 * y_pred_svm + 0.3 * y_pred_knn

# Reshape the array to have two dimensions
num_val_samples = weighted_average.shape[0]
weighted_average = weighted_average.reshape(num_val_samples, -1)  # Adjust -1 according to the number of classes

# Convert the continuous blend to class labels
y_pred_combined = np.argmax(weighted_average, axis=1)

# Calculate accuracy on the validation set
accuracy = accuracy_score(y_val, y_pred_combined)
print("Blended Model Validation Set Accuracy:", accuracy)

# Make predictions on the test set
test_pred_svm = svm_classifier.predict_proba(X_test)
test_pred_knn = knn_classifier.predict_proba(X_test)
test_weighted_average = 0.7 * test_pred_svm + 0.3 * test_pred_knn

# Reshape the array to have two dimensions
num_test_samples = test_weighted_average.shape[0]
test_weighted_average = test_weighted_average.reshape(num_test_samples, -1)  # Adjust -1 according to the number of classes

# Convert the continuous blend to class labels
test_pred_combined = np.argmax(test_weighted_average, axis=1)

submission_df = pd.DataFrame({"id": test_data["id"], "label": test_pred_combined})
submission_df.to_csv("svm_knn_combined_submission.csv", index=False)

# 88.2