In [2]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix

# Load .npy files
X_train = np.load('/kaggle/input/classifier-data/X_train.npy')
y_train_raw = np.load('/kaggle/input/classifier-data/y_train.npy')
X_test = np.load('/kaggle/input/classifier-data/X_test.npy')
y_test_raw = np.load('/kaggle/input/classifier-data/y_test.npy')

# Convert y from multi-label to binary (1 if any class is active, else 0)
y_train = (y_train_raw.sum(axis=1) > 0).astype(int)
y_test = (y_test_raw.sum(axis=1) > 0).astype(int)

# Define a pipeline: scaling + SVM
class_weights = {0:1.286, 1: 0.82}
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(class_weight=class_weights))
])

# Define parameter grid
param_grid = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': ['scale', 'auto']
}

# Stratified K-Fold for balanced validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Grid Search with F1 score
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=cv,
    scoring='f1',
    verbose=2,
    n_jobs=-1
)

# Fit the model
grid_search.fit(X_train, y_train)

# Show best parameters
print("Best Parameters:", grid_search.best_params_)

# Predict and evaluate
y_pred = grid_search.predict(X_test)

print("\n Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best Parameters: {'svc__C': 10, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}

 Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.59      0.71        17
           1       0.68      0.94      0.79        16

    accuracy                           0.76        33
   macro avg       0.80      0.76      0.75        33
weighted avg       0.80      0.76      0.75        33

Confusion Matrix:
 [[10  7]
 [ 1 15]]
[CV] END ...svc__C=0.1, svc__gamma=scale, svc__kernel=linear; total time=   0.0s
[CV] END ....svc__C=0.1, svc__gamma=auto, svc__kernel=linear; total time=   0.0s
[CV] END .......svc__C=0.1, svc__gamma=auto, svc__kernel=rbf; total time=   0.0s
[CV] END .....svc__C=1, svc__gamma=scale, svc__kernel=linear; total time=   0.0s
[CV] END .....svc__C=1, svc__gamma=scale, svc__kernel=linear; total time=   0.0s
[CV] END ........svc__C=1, svc__gamma=scale, svc__kernel=rbf; tota

In [1]:
import numpy as np

# Assuming y_train_raw is your (126, 6) matrix
y_train_raw = np.load('/kaggle/input/classifier-data/y_train.npy')  # Replace with your actual data if necessary

# Sum each row and count rows where the sum is 0
row_sums = y_train_raw.sum(axis=1)  # Sum along each row (axis=1)

# Count rows where sum is 0
count_zero_sum_rows = np.sum(row_sums == 0)

print("Number of rows with sum 0:", count_zero_sum_rows)


Number of rows with sum 0: 49
