In [24]:
import numpy as np

def myloss(model, x, y):
    # reshape for sklearn
    x = np.array(x).reshape(1, -1)

    # sklearn: decision_function = signed distance to hyperplane
    score_raw = model.decision_function(x)

    # For binary classification: score_raw is a scalar
    score_abs = np.abs(score_raw)

    # predicted label
    label = model.predict(x)[0]     # scalar

    # MATLAB had: score = label * abs(score)
    score = label * score_abs

    # Margin loss
    if score * y > 1:
        loss = 0
    else:
        loss = 1 - score * y

    return loss, score


def testpredicter(model, X, Y):
    preds = model.predict(X)

    accuracy = np.mean(preds == Y)
    return accuracy

In [41]:
import pickle

# Load X arrays
with open("X_train.pkl", "rb") as f:
    X_train = pickle.load(f)

with open("X_test.pkl", "rb") as f:
    X_test = pickle.load(f)

# Load y arrays
with open("y_train.pkl", "rb") as f:
    y_train = pickle.load(f)

with open("y_test.pkl", "rb") as f:
    y_test = pickle.load(f)


print("Original X_train shape:", X_train.shape)
print("Original y_train shape:", y_train.shape)
print("Original X_test shape:", X_test.shape)
print("Original y_test shape:", y_test.shape)

Original X_train shape: (248036, 8)
Original y_train shape: (248036,)
Original X_test shape: (106302, 8)
Original y_test shape: (106302,)


In [47]:
import numpy as np
import matplotlib.pyplot as plt

X_all = np.vstack([X_train, X_test])
y_all = np.concatenate([y_train, y_test])

# ---------------------------
# Step 2: Balance classes
# ---------------------------
np.random.seed(42)

# Find indices for each class
idx_pos = np.where(y_all == 1)[0]
idx_neg = np.where(y_all == -1)[0]

# Choose same number of samples for each class
n_samples_per_class = min(len(idx_pos), len(idx_neg))
sel_pos = np.random.choice(idx_pos, n_samples_per_class, replace=False)
sel_neg = np.random.choice(idx_neg, n_samples_per_class, replace=False)

# Combine selected indices and shuffle
selected_idx = np.concatenate([sel_pos, sel_neg])
np.random.shuffle(selected_idx)

X_bal = X_all[selected_idx]
y_bal = y_all[selected_idx]

# ---------------------------
# Step 4: Shuffle and split back to train/test
# ---------------------------
np.random.seed(42)
perm = np.random.permutation(len(y_bal))
X_bal = X_bal[perm]
y_bal = y_bal[perm]

# For example, 70% train, 30% test
split = int(0.7 * len(y_bal))
X_train_new = X_bal[:split]
y_train_new = y_bal[:split]
X_test_new = X_bal[split:]
y_test_new = y_bal[split:]

print("New X_train shape:", X_train_new.shape)
print("New y_train shape:", y_train_new.shape)
print("New X_test shape:", X_test_new.shape)
print("New y_test shape:", y_test_new.shape)

print("Train Class -1:", np.sum(y_train_new == -1))
print("train Class 1 :", np.sum(y_train_new == 1))


print("Test Class -1:", np.sum(y_test_new == -1))
print("Test Class 1 :", np.sum(y_test_new == 1))

New X_train shape: (201993, 8)
New y_train shape: (201993,)
New X_test shape: (86569, 8)
New y_test shape: (86569,)
Train Class -1: 101144
train Class 1 : 100849
Test Class -1: 43137
Test Class 1 : 43432


In [57]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC,LinearSVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import time
from sklearn.linear_model import SGDClassifier
import numpy as np

np.random.seed(42)
index = np.random.permutation(100)


# Find indices of each class in the full training set
idx_pos = np.where(y_train_new == 1)[0]      # class 1
idx_neg = np.where(y_train_new == -1)[0]     # class -1

# Randomly choose  from each
np.random.seed(42)
sel_pos = np.random.choice(idx_pos, 20, replace=False)
sel_neg = np.random.choice(idx_neg, 20, replace=False)

# Combine indices
selected_idx = np.concatenate([sel_pos, sel_neg])

# Extract the balanced training subset
X_train_small= X_train_new[selected_idx]
y_train_small = y_train_new[selected_idx]

print(X_train_small.shape, y_train_small.shape)
print("Class -1:", np.sum(y_train_small == -1))
print("Class 1 :", np.sum(y_train_small == 1))



# Train
t0 = time.time()

svm = SVC(kernel='rbf', C=1.0, gamma='scale', verbose=True)
svm.fit(X_train_small, y_train_small)

t1 = time.time()
print("Training time (seconds):", t1 - t0)

y_pred_test  = svm.predict(X_test_new)
print("Test Accuracy:", accuracy_score(y_test_new, y_pred_test))



Dtrain = np.hstack([y_train_new.reshape(-1, 1), X_train_new])
Dtest  = np.hstack([y_test_new.reshape(-1, 1),  X_test_new])

l = len(Dtrain)    # number of train samples
print(Dtrain.shape)
print(Dtest.shape)


# Train
N=20;
T=10;
n2=20;
q=1.7;

t0 = time.time()
gcell, alpha, ACCURACY1 = mysvmbm(Dtrain, n2, q, N, T, l, Dtest)
t1 = time.time()
print("Training time (seconds):", t1 - t0)

# Accuracy per iteration
for t in range(T):
    print(f"Round {t+1}: alpha={alpha[t]:.4f}, "
          f"Dt_acc={ACCURACY1[t,0]:.4f}, "
          f"Train_acc={ACCURACY1[t,1]:.4f}, "
          f"Test_acc={ACCURACY1[t,2]:.4f}")






(40, 8) (40,)
Class -1: 20
Class 1 : 20
[LibSVM]Training time (seconds): 0.003002643585205078
Test Accuracy: 0.6891727985768578
(201993, 9)
(86569, 9)
30
(20, 9)
Training time (seconds): 0.23869800567626953
34
(20, 9)
Training time (seconds): 0.25919151306152344
28
(20, 9)
Training time (seconds): 0.24733734130859375
35
(20, 9)
Training time (seconds): 0.24525117874145508
26
(20, 9)
Training time (seconds): 0.20753860473632812
39
(20, 9)
Training time (seconds): 0.24534320831298828
34
(20, 9)
Training time (seconds): 0.24231266975402832
38
(20, 9)
Training time (seconds): 0.3673079013824463
29
(20, 9)
Training time (seconds): 0.6724686622619629
33
(20, 9)
Training time (seconds): 0.2358384132385254
46
(20, 9)
Training time (seconds): 0.2859642505645752
Training time (seconds): 3.286907196044922
Round 1: alpha=0.4595, Dt_acc=0.9000, Train_acc=0.7148, Test_acc=0.7149
Round 2: alpha=0.0994, Dt_acc=0.7000, Train_acc=0.5495, Test_acc=0.5526
Round 3: alpha=0.1754, Dt_acc=0.7000, Train_acc=0.

In [19]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


def mysvmbm(Dtrain, n2, q, N, T, l, Dtest):
    t0 = time.time()
    # -------------------------------------
    #  RANDOM INITIAL TRAINING SET
    # -------------------------------------
    index = np.random.permutation(l)
    x0 = Dtrain[index[:N], 1:9]   # columns 2:9 
    y0 = Dtrain[index[:N], 0]     # column 1
    
    LAMBDA = 0.1
    g0 = SVC(kernel='linear', C=LAMBDA)
    g0.fit(x0, y0)

    # -------------------------------------
    #  ALLOCATION
    # -------------------------------------
    gcell = [None] * T
    alpha = np.zeros(T)
    ACCURACY1 = np.zeros((T, 3))
    
    pt = [np.zeros(N + 1) for _ in range(T)]
    Dt = [np.zeros((N, 9)) for _ in range(T)]

    # -------------------------------------
    #  INITIAL xi , yi
    # -------------------------------------
    index = np.random.permutation(l)
    xi = np.zeros((N+1, 8))
    yi = np.zeros(N+1)

    xi[0, :] = Dtrain[index[10], 1:9]
    yi[0] = Dtrain[index[10], 0]

    t = 0

    # ---------------------------------------------------
    # MAIN LOOP  t = 1 ... T
    # ---------------------------------------------------
    while t < T:

        i = 0
        n1 = 0
        flag = np.zeros(N+1)
        COUNTER=0;

        while i < N:
            COUNTER=COUNTER+1
            index = np.random.permutation(l)
            xstar = Dtrain[index[0], 1:9]
            ystar = Dtrain[index[0], 0]

            # ---------------------------------------------
            # COMPUTE LOSS
            # ---------------------------------------------
            if t == 0:
                L1, score1 = myloss(g0, xstar, ystar)
                L2, score2 = myloss(g0, xi[i], yi[i])
            else:
                L1, score1 = myloss(gcell[t-1], xstar, ystar)
                L2, score2 = myloss(gcell[t-1], xi[i], yi[i])

            # ---------------------------------------------
            # UPDATE pt
            # ---------------------------------------------
            L1 = np.asarray(L1).item()
            L2 = np.asarray(L2).item()

            pt[t][i+1] = min(1, np.exp(-L1) / np.exp(-L2))


            

            
            #pt[t][i+1] = min(1, np.exp(-L1) / np.exp(-L2))

            if n1 > n2:
                pt[t][i+1] = min(1, q * pt[t][i+1])
                Dt[t][i, 1:9] = xstar
                Dt[t][i, 0] = ystar
                flag[i] = 1

                i += 1
                xi[i] = xstar
                yi[i] = ystar
                n1 = 0

            # ---------------------------------------------
            # SPECIAL CASE
            # ---------------------------------------------
            if pt[t][i+1] == 1 and (ystar * yi[i]) == 1:
                #pt[t][i+1] = np.exp(-ystar * score1) / np.exp(-yi[i] * score2)
                #pt[t][i+1] = np.exp(-ystar * float(score1)) / np.exp(-yi[i] * float(score2))
                s1 = np.asarray(score1).item()
                s2 = np.asarray(score2).item()
                
                pt[t][i+1] = np.exp(-ystar * s1) / np.exp(-yi[i] * s2)



            # ---------------------------------------------
            # ACCEPT OR REJECT
            # ---------------------------------------------
            if np.random.rand() < pt[t][i+1]:
                Dt[t][i, 1:9] = xstar
                Dt[t][i, 0] = ystar
                flag[i] = 1

                i += 1
                xi[i] = xstar
                yi[i] = ystar
                n1 = 0

            if flag[i] == 0:
                n1 += 1


        print(COUNTER)
        COUNTER=0
                



        
        # END WHILE i

        # ----------------------------------------------------
        # TRAIN NEW SVM ON Dt
        # ----------------------------------------------------
        clf = SVC(kernel='linear', C=LAMBDA)
        print(Dt[t].shape)
        clf.fit(Dt[t][:, 1:9], Dt[t][:, 0])
        gcell[t] = clf

        # ----------------------------------------------------
        # EVALUATION
        # ----------------------------------------------------
        acc_train = testpredicter(clf, Dtrain[:, 1:9], Dtrain[:, 0])

        


        # compute alpha
        et = 1 - acc_train
        #alpha = 0.5 * np.log((1 - et) / et)
        alpha[t] = 0.5 * np.log((1 - et) / et)

        # restart xi, yi with last sample
        xi = np.zeros((N+1, 8))
        yi = np.zeros(N+1)
        xi[0, :] = xstar
        yi[0] = ystar
        
        t1 = time.time()
        print("Training time (seconds):", t1 - t0)

        acc_Dt = testpredicter(clf, Dt[t][:, 1:9], Dt[t][:, 0])
        acc_test = testpredicter(clf, Dtest[:, 1:9], Dtest[:, 0])
        ACCURACY1[t, 0] = acc_Dt      # accuracy on Dt
        ACCURACY1[t, 1] = acc_train   # accuracy on Dtrain
        ACCURACY1[t, 2] = acc_test    # accuracy on Dtest

        if alpha[t] > 0:
            t += 1
            t0 = time.time()

    

    return gcell, alpha, ACCURACY1
