## a4a Dataset

In [None]:
from Modele import Modele
from Optimiseur import Optimiseur
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.datasets import load_svmlight_file

X_train_, y_train = load_svmlight_file("a4a.txt")
print("X_train.shape:", X_train_.shape)
print("y_train.shape:", y_train.shape)

In [None]:
X_test_, y_test = load_svmlight_file("a4a_t.Txt")
X_test_ = X_test_[:,:-1]
print("X_test.shape:", X_test_.shape)
print("y_test.shape:", y_test.shape)

In [None]:
X_train_dense = X_train_.toarray()  
X_test_dense = X_test_.toarray()

X_train = X_train_dense.reshape(-1, 122) 
X_test = X_test_dense.reshape(-1, 122)

## RBF parameter tuning

In [None]:
gammas = [1e-5,1e-4,1e-3,1e-2,1e-1,1,1e1,1e2,1e3,1e4,1e5]
alphas = []
train_loss = []
train_accuracy = []
test_accuracy = []

for gamma in gammas:
    modele = Modele(0,gamma)
    alpha = modele.alpha_opt(X_train,y_train,gamma)
    alphas.append(alpha)
    train_loss.append(modele.loss_function(X_train,y_train,alpha))
    train_accuracy.append(modele.accuracy(X_train,X_train,y_train,alpha))
    test_accuracy.append(modele.accuracy(X_train,X_test,y_test,alpha))

In [None]:
best_test = np.where(test_accuracy == np.max(test_accuracy))[0]
best_train = np.where(train_accuracy == np.max(train_accuracy))[0]

print("best gamma range for train: ", [gammas[i] for i in best_train])
print("best gamma range for test: ", [gammas[i] for i in best_test])

In [None]:
plt.figure()
plt.scatter(gammas, test_accuracy)
plt.plot(gammas, test_accuracy, label='Test accuracy')
plt.xlabel('Parameter Gamma (log scale)')
plt.xscale('log')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy for different parameter gamma')
plt.savefig('accuracy_rbf_a4a.pdf')
plt.show()

plt.figure()
plt.scatter(gammas,train_loss)
plt.plot(gammas, train_loss, label='Train loss')
plt.xlabel('Parameter Gamma (log scale)')
plt.xscale('log')
plt.yscale('log')
plt.ylabel('Train loss (log scale)')
plt.title('Train loss for different parameter gamma')
plt.legend()
plt.show()



In [None]:
gammas_2 = np.linspace(0.10,1,30)
alphas_2 = []
train_loss_2 = []
train_accuracy_2 = []
test_accuracy_2 = []

for gamma in gammas_2:
    modele = Modele(0,gamma)
    alpha = modele.alpha_opt(X_train,y_train,gamma)
    alphas_2.append(alpha)
    train_loss_2.append(modele.loss_function(X_train,y_train,alpha))
    train_accuracy_2.append(modele.accuracy(X_train,X_train,y_train,alpha))
    test_accuracy_2.append(modele.accuracy(X_train,X_test,y_test,alpha))

In [None]:
best_test = np.where(test_accuracy_2 == np.max(test_accuracy_2))[0]
best_train = np.where(train_accuracy_2 == np.max(train_accuracy_2))[0]

print("best gamma range for train: ", [gammas_2[i] for i in best_train])
print("best gamma range for test: ", [gammas_2[i] for i in best_test])


plt.figure()
plt.scatter(gammas_2, test_accuracy_2)
plt.plot(gammas_2, test_accuracy_2, label='Test accuracy')
plt.xlabel('Parameter Gamma')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy for different parameter gamma')
plt.savefig('accuracy_rbf_2_a4a.pdf')
plt.show()

plt.figure()
plt.scatter(gammas_2,train_loss_2)
plt.plot(gammas_2, train_loss_2, label='Train loss')
plt.xlabel('Parameter Gamma (log scale)')

plt.ylabel('Train loss (log scale)')
plt.title('Train loss for different parameter gamma')
plt.legend()
plt.show()



In [None]:
print("gamma chosen:",0.6)

## Create figure to compare the test accuracy vs learning rate

In [None]:
modele = Modele(0,0.6)
optimiseur = Optimiseur(modele)
L,mu = modele.constante_L(X_train)

In [None]:
max_iters=5000
target=0.05
criterion='norm'

n_lr= 20
nb_init = 10

train_loss_001_2k = np.zeros((n_lr,nb_init))
test_loss_001_2k = np.zeros((n_lr,nb_init))
norm_001_2k = np.zeros((n_lr,nb_init))
train_accuracy_001_2k = np.zeros((n_lr,nb_init))
test_accuracy_001_2k = np.zeros((n_lr,nb_init))
nb_ite_001_2k = np.zeros((n_lr,nb_init))

for i in range(nb_init):
    init= np.random.randn(X_train.shape[0]) 

    L_001_2k,mu = modele.constante_L(X_train)
    learning_rates = np.linspace(1/L_001_2k,1.99/L_001_2k,n_lr)

    j=0
    alphas=[]
    for lr in learning_rates:
        alpha, alpha_list = optimiseur.gradient_descent(X_train, y_train, init.copy(),lr, max_iters, target, criterion)
        alphas.append(alpha)
        nb_ite_001_2k[j,i] = len(alpha_list)
        j+=1
    train_loss, test_loss, norm, train_accuracy, test_accuracy = modele.compute_all(X_train, X_test,y_train, y_test,alphas)
    train_loss_001_2k[:,i] = train_loss
    test_loss_001_2k[:,i] = test_loss
    norm_001_2k[:,i] = norm
    train_accuracy_001_2k[:,i] = train_accuracy
    test_accuracy_001_2k[:,i] = test_accuracy
        

print("dimensions alphas: (n_lr,nb_init,X_train.shape[0])")

In [None]:
learning_rates = np.linspace(1,1.99,n_lr)

print(nb_ite_001_2k)

test_loss_mean = np.mean(test_loss_001_2k[:,:6], axis=1)
test_loss_std = np.std(test_loss_001_2k[:,:6], axis=1)
train_loss_mean = np.mean(train_loss_001_2k[:,:6], axis=1)
train_loss_std = np.std(train_loss_001_2k[:,:6], axis=1)
norm_mean = np.mean(norm_001_2k[:,:6], axis=1)
norm_std = np.std(norm_001_2k[:,:6], axis=1)
train_accuracy_mean = np.mean(train_accuracy_001_2k[:,:6], axis=1)
train_accuracy_std = np.std(train_accuracy_001_2k[:,:6], axis=1)
test_accuracy_mean = np.mean(test_accuracy_001_2k[:,:6], axis=1)
test_accuracy_std = np.std(test_accuracy_001_2k[:,:6], axis=1)

ts = np.linspace(1,1.99,n_lr)

plt.figure()
#plt.scatter(ts, test_accuracy_mean, label='Test Accuracy')
plt.plot(ts, test_accuracy_mean)
plt.fill_between(ts, test_accuracy_mean - test_accuracy_std, test_accuracy_mean + test_accuracy_std, alpha=0.2)
plt.xlabel('Learning Rate * L')
plt.ylabel('Test Accuracy')
plt.title('Test Accuracy vs Learning Rate')
#plt.legend()
#plt.savefig('Figures/GD/test_accuracy_a4a_001.pdf')
plt.show()