In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
# Load the training dataset
train_data = np.load('/kaggle/input/d/andyzaur/tia-2024/train.npz')

x_train = train_data['x_train']
y_train = train_data['y_train']


In [3]:
# Normalize the features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)

# Split the training data for validation
x_train_split, x_val, y_train_split, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

In [4]:
# Initialize and train the SVM model with RBF kernel
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(x_train_split, y_train_split)


In [5]:
# Validate the model
y_val_pred = svm_model.predict(x_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.8942


In [6]:
# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.88      0.85      1121
           1       1.00      0.97      0.98      1120
           2       0.81      0.82      0.82      1119
           3       0.89      0.91      0.90      1110
           4       0.82      0.85      0.83      1128
           5       0.97      0.95      0.96      1110
           6       0.75      0.67      0.71      1119
           7       0.94      0.96      0.95      1128
           8       0.97      0.97      0.97      1132
           9       0.96      0.96      0.96      1113

    accuracy                           0.89     11200
   macro avg       0.89      0.89      0.89     11200
weighted avg       0.89      0.89      0.89     11200


Confusion Matrix:
[[ 985    0   15   28    1    1   84    0    7    0]
 [   3 1087    4   21    2    0    3    0    0    0]
 [  14    0  917   16  103    1   63    0    5    0]
 [  32    2    6 1010   29    0   28   

In [7]:
# Train the model on the full training set
svm_model.fit(x_train, y_train)

In [8]:
# Load the test dataset
test_data = np.load('/kaggle/input/d/andyzaur/tia-2024/test.npz')
x_test = test_data['x_test']

# Normalize the test features
x_test = scaler.transform(x_test)

# Predict on the test set
y_test_pred = svm_model.predict(x_test)

In [9]:
# Create a submission file
submission = pd.DataFrame({
    'Id': np.arange(len(y_test_pred)),
    'Label': y_test_pred
})
submission.to_csv('submission.csv', index=False)