In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers, models

# Load datasets
train_feat = np.load("datasets/train/train_feature.npz", allow_pickle=True)
train_feat_X = train_feat['features']
train_feat_Y = train_feat['label']

valid_feat = np.load("datasets/valid/valid_feature.npz", allow_pickle=True)
valid_feat_X = valid_feat['features']
valid_feat_Y = valid_feat['label']

test_feat = np.load("datasets/test/test_feature.npz", allow_pickle=True)
test_feat_X = test_feat['features']

percentages = [0.2, 0.4, 0.6, 0.8, 1.0]

# Function to train Soft SVM and compute accuracy
def evaluate_svm(train_feat_X_method, valid_feat_X_method, method_name):
    accuracies = []
    for percentage in percentages:
        n_samples = int(len(train_feat_X_method) * percentage)
        
        # Subset the training data
        x_train_subset = train_feat_X_method[:n_samples]
        y_train_subset = train_feat_Y[:n_samples]
        
        # Scale the features
        scaler = StandardScaler()
        x_train_scaled = scaler.fit_transform(x_train_subset)
        x_valid_scaled = scaler.transform(valid_feat_X_method)

        # Train the Soft SVM model
        svm_classifier = SVC(kernel='linear', random_state=42)  # Soft SVM with linear kernel
        svm_classifier.fit(x_train_scaled, y_train_subset)

        # Predict on the validation set
        y_pred = svm_classifier.predict(x_valid_scaled)

        # Calculate accuracy
        accuracy = accuracy_score(valid_feat_Y, y_pred)
        accuracies.append(accuracy)
        print(f"Accuracy ({method_name}) with {int(percentage*100)}% training data: {accuracy:.2f}")
    
    return accuracies

# Concatenation of Embeddings
train_feat_X_concat = train_feat_X.reshape(train_feat_X.shape[0], -1)
valid_feat_X_concat = valid_feat_X.reshape(valid_feat_X.shape[0], -1)

# Check the shape of the concatenated features to understand the dimensionality
print(f"Concatenated Train Features Shape: {train_feat_X_concat.shape}")
print(f"Concatenated Validation Features Shape: {valid_feat_X_concat.shape}")

# Build a very small neural network with only 1 neuron
def build_small_nn():
    model = models.Sequential()
    model.add(layers.Dense(1, activation='sigmoid', input_shape=(train_feat_X_concat.shape[1],)))  # Only 1 neuron
    return model

# Create the model and compile
small_nn_model = build_small_nn()
small_nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary to check trainable parameters
small_nn_model.summary()

# Evaluate SVM with concatenated features
accuracies_concat = evaluate_svm(train_feat_X_concat, valid_feat_X_concat, "Concatenation")


Concatenated Train Features Shape: (7080, 9984)
Concatenated Validation Features Shape: (489, 9984)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy (Concatenation) with 20% training data: 0.94
Accuracy (Concatenation) with 40% training data: 0.97
Accuracy (Concatenation) with 60% training data: 0.97
Accuracy (Concatenation) with 80% training data: 0.98
Accuracy (Concatenation) with 100% training data: 0.98
