In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix

# Load .npy files
X_train = np.load('/kaggle/input/classifier-data/X_train.npy')
y_train_raw = np.load('/kaggle/input/classifier-data/y_train.npy')
X_test = np.load('/kaggle/input/classifier-data/X_test.npy')
y_test_raw = np.load('/kaggle/input/classifier-data/y_test.npy')

# Convert y from multi-label to binary (1 if any class is active, else 0)
y_train = (y_train_raw.sum(axis=1) > 0).astype(int)
y_test = (y_test_raw.sum(axis=1) > 0).astype(int)

# Define a pipeline: scaling + SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(class_weight='balanced'))
])

# Define parameter grid
param_grid = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': ['scale', 'auto']
}

# Stratified K-Fold for balanced validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Grid Search with F1 score
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=cv,
    scoring='f1',
    verbose=2,
    n_jobs=-1
)

# Fit the model
grid_search.fit(X_train, y_train)

# Show best parameters
print("Best Parameters:", grid_search.best_params_)

# Predict and evaluate
y_pred = grid_search.predict(X_test)

print("\n Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best Parameters: {'svc__C': 10, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}

 Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.59      0.71        17
           1       0.68      0.94      0.79        16

    accuracy                           0.76        33
   macro avg       0.80      0.76      0.75        33
weighted avg       0.80      0.76      0.75        33

Confusion Matrix:
 [[10  7]
 [ 1 15]]


In [2]:
!pip install -q imbalanced-learn==0.11.0


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.6/235.6 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import numpy as np

# Load the file
data = np.load('/kaggle/input/classifier-data/X_train.npy')

# Print basic info
print("Shape:", data.shape)
print("Data type:", data.dtype)

# Print the full array (for small arrays)
print("Contents:\n", data)

Shape: (126, 360)
Data type: float64
Contents:
 [[1.         0.87244898 0.87244898 ... 0.44000003 0.09       0.47000003]
 [1.         0.89456869 0.8115016  ... 0.31693989 0.48087433 0.26229507]
 [1.         0.92546584 0.8757764  ... 0.72222227 1.         0.65656567]
 ...
 [1.         0.91319444 0.81597222 ... 0.5393939  1.         0.85454547]
 [1.         0.85046729 0.77570093 ... 0.99145305 0.84615386 0.98290592]
 [1.         0.88235294 0.82972136 ... 0.56050956 0.38853505 0.54777068]]


In [4]:
print("First 5 entries:\n", data[:5])

First 5 entries:
 [[1.         0.87244898 0.87244898 ... 0.44000003 0.09       0.47000003]
 [1.         0.89456869 0.8115016  ... 0.31693989 0.48087433 0.26229507]
 [1.         0.92546584 0.8757764  ... 0.72222227 1.         0.65656567]
 [1.         0.71627907 0.91627907 ... 0.64788735 0.64084512 0.28873241]
 [1.         0.89393939 0.81818182 ... 0.56291395 0.68211919 0.61589408]]


In [5]:
import numpy as np

# Load the file
data = np.load('/kaggle/input/classifier-data/y_train.npy')

# Print basic info
print("Shape:", data.shape)
print("Data type:", data.dtype)

# Print the full array (for small arrays)
print("Contents:\n", data)

Shape: (126, 6)
Data type: int64
Contents:
 [[0 0 0 0 0 0]
 [0 0 1 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 1 1 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 1 1 0 0 0]
 [0 1 1 1 0 0]
 [0 0 0 0 0 0]
 [1 1 1 1 1 1]
 [1 0 0 1 0 0]
 [0 0 1 1 0 0]
 [0 0 0 0 0 0]
 [0 0 1 1 0 0]
 [0 0 0 1 1 0]
 [0 1 1 0 0 0]
 [1 1 1 1 0 0]
 [1 1 1 1 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 1 1 0 0]
 [0 1 1 1 0 0]
 [1 1 1 1 0 0]
 [0 1 1 0 0 0]
 [1 1 0 0 0 0]
 [0 0 0 0 0 0]
 [1 0 0 1 0 0]
 [1 1 1 1 1 0]
 [0 0 0 0 0 0]
 [0 1 0 0 0 0]
 [1 1 1 0 0 0]
 [0 1 1 0 0 0]
 [0 1 1 1 1 0]
 [0 1 1 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 1 1 0]
 [0 0 1 0 0 0]
 [1 1 1 0 0 0]
 [1 1 1 1 1 1]
 [0 1 1 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 1 1 1 0 0]
 [0 0 0 0 0 0]
 [0 1 1 0 0 0]
 [0 0 0 0 0 0]
 [1 1 1 1 1 1]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [1 1 1 1 1 1]
 [1 1 1 1 0 0]
 [0 0 1 1 0 0]
 [0 0 0 1 1 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 1 0 0 0]
 [1 1 1 0 0

In [6]:
import numpy as np

# Load the file
data = np.load('/kaggle/input/classifier-data/X_test.npy')

# Print basic info
print("Shape:", data.shape)
print("Data type:", data.dtype)

# Print the full array (for small arrays)
print("Contents:\n", data)

Shape: (33, 360)
Data type: float64
Contents:
 [[1.         0.84615385 1.         ... 0.5181818  0.39090908 0.69999999]
 [1.         0.99122807 0.82894737 ... 0.08231707 0.24085365 0.25304878]
 [1.         0.84393064 0.95953757 ... 0.23917997 0.23234625 0.2528474 ]
 ...
 [1.         1.         0.89595376 ... 0.27464789 0.41549295 0.45070425]
 [1.         0.79116466 0.93574297 ... 0.43378994 0.31963471 0.44292235]
 [1.         0.9408867  0.91133005 ... 0.34710744 0.36363637 0.49586779]]
