In [2]:
from Modele import Modele
from Optimiseur import Optimiseur
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_svmlight_file

In [None]:
def CKN_MNIST(n,q):
    # Load the CKN MNIST dataset
    
    data = np.load('ckn_mnist.npz')

    # Access the arrays in the dataset
    images = data['X']
    labels = data['y']
    #  Binarization of the labels
    labels = np.where(labels < 5, -1, 1)

    indices = np.random.choice(images.shape[0], n, replace=False)
    images_ = images[indices]
    labels_ = labels[indices]
    # Separe the dataset into training and testing
    n_train = int(n*q)
    train_images = images_[:n_train]
    train_labels = labels_[:n_train]
    test_images = images_[n_train:]
    test_labels = labels_[n_train:]
    # Reshape the images to 2D
    X_train = train_images.reshape(train_images.shape[0], -1)
    X_test = test_images.reshape(test_images.shape[0], -1)
    y_train = train_labels.reshape(-1)
    y_test = test_labels.reshape(-1)
    return X_train, y_train, X_test, y_test

In [None]:
def a4a(): 
    # Load the a4a dataset
    
    X_train_, y_train = load_svmlight_file("a4a.txt")
    X_test_, y_test = load_svmlight_file("a4a_t.Txt")   
    X_test_ = X_test_[:,:-1]
    X_train_dense = X_train_.toarray()  
    X_test_dense = X_test_.toarray()

    X_train = X_train_dense.reshape(-1, 122) 
    X_test = X_test_dense.reshape(-1, 122)
    return X_train, y_train, X_test, y_test

In [None]:
def create_modele(Lambda,gamma,X_train):
    # Create the model and optimizer, and initialize randomly the initial vector
    modele = Modele(Lambda,gamma)
    optimiseur = Optimiseur(modele)
    L,mu = modele.constante_L(X_train)
    init= np.random.randn(X_train.shape[0]) 
    return modele, optimiseur, L, mu, init

In [None]:
def gd_mean(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,name):
    """ Perform gradient descent with different learning rates between 1.9/L and 1.99/L and compute the mean of the results"""
    
    learning_rates = [1.9/L,1.91/L,1.92/L,1.93/L,1.94/L,1.95/L,1.96/L,1.97/L,1.98/L,1.99/L]

    # Initialize lists to store results for each learning rate
    train_loss_gd_= []
    test_loss_gd_= []
    norm_gd_= []
    train_accuracy_gd_= []
    test_accuracy_gd_= []
    # Iterate over each learning rate and perform gradient descent
    for lr in learning_rates:
        alpha, alpha_list = optimiseur.gradient_descent(X_train, y_train, init.copy(),lr, max_iters, target, criterion)
        train_loss_list, test_loss_list, norm_list, train_accuracy_list, test_accuracy_list = modele.compute_all(X_train,X_test, y_train, y_test, alpha_list)

        train_loss_gd_.append(train_loss_list)
        test_loss_gd_.append(test_loss_list)
        norm_gd_.append(norm_list)
        train_accuracy_gd_.append(train_accuracy_list)
        test_accuracy_gd_.append(test_accuracy_list)

    # Find the minimum length among all arrays in train_loss_iteration_99
    max_length = max(len(arr) for arr in train_loss_gd_)

    # Create copies of the lists to avoid modifying the original lists
    train_loss_gd= train_loss_gd_.copy()
    test_loss_gd= test_loss_gd_.copy()
    norm_gd= norm_gd_.copy()
    train_accuracy_gd= train_accuracy_gd_.copy()
    test_accuracy_gd= test_accuracy_gd_.copy()

    # Truncate or pad each array to the maximum length
    for i in range(len(train_loss_gd_)):
        if len(train_loss_gd[i]) < max_length:
            train_loss_gd[i] = np.append(train_loss_gd[i], [train_loss_gd[i][-1]] * (max_length - len(train_loss_gd[i])))
            test_loss_gd[i] = np.append(test_loss_gd[i], [test_loss_gd[i][-1]] * (max_length - len(test_loss_gd[i])))
            norm_gd[i] = np.append(norm_gd[i], [norm_gd[i][-1]] * (max_length - len(norm_gd[i])))
            train_accuracy_gd[i] = np.append(train_accuracy_gd[i], [train_accuracy_gd[i][-1]] * (max_length - len(train_accuracy_gd[i])))
            test_accuracy_gd[i] = np.append(test_accuracy_gd[i], [test_accuracy_gd[i][-1]] * (max_length - len(test_accuracy_gd[i])))
    # Compute the mean across the truncated arrays

    #save into .npy files
    np.save(name+"train_loss.npy", train_loss_gd)
    np.save(name+"test_loss.npy", test_loss_gd)
    np.save(name+"norm.npy", norm_gd)
    np.save(name+"train_accuracy.npy", train_accuracy_gd)
    np.save(name+"test_accuracy.npy", test_accuracy_gd)


In [None]:
def gd(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,name):
    """ Perform gradient descent with different learning rates and save the results"""

    learning_rates=[1/L,1.5/L,1.9/L]
    # Initialize lists to store results for each learning rate
    train_loss_iteration= []
    test_loss_iteration= []
    norm_iteration= []
    train_accuracy_iteration= []
    test_accuracy_iteration= []
    # Iterate over each learning rate and perform gradient descent
    for lr in learning_rates:
        alpha, alpha_list = optimiseur.gradient_descent(X_train, y_train, init.copy(),lr, max_iters, target, criterion)
        train_loss_list, test_loss_list, norm_list, train_accuracy_list, test_accuracy_list = modele.compute_all(X_train,X_test, y_train, y_test, alpha_list)

        train_loss_iteration.append(train_loss_list)
        test_loss_iteration.append(test_loss_list)
        norm_iteration.append(norm_list)
        train_accuracy_iteration.append(train_accuracy_list)
        test_accuracy_iteration.append(test_accuracy_list)

    # Save the results into .npy files
    np.save(name+"train_loss_1.npy", np.array(train_loss_iteration[0]))
    np.save(name+"test_loss_1.npy", np.array(test_loss_iteration[0]))
    np.save(name+"norm_1.npy", np.array(norm_iteration[0]))
    np.save(name+"train_accuracy_1.npy", np.array(train_accuracy_iteration[0]))
    np.save(name+"test_accuracy_1.npy", np.array(test_accuracy_iteration[0]))
    np.save(name+"train_loss_15.npy", np.array(train_loss_iteration[1]))
    np.save(name+"test_loss_15.npy", np.array(test_loss_iteration[1]))
    np.save(name+"norm_15.npy", np.array(norm_iteration[1]))
    np.save(name+"train_accuracy_15.npy", np.array(train_accuracy_iteration[1]))
    np.save(name+"test_accuracy_15.npy", np.array(test_accuracy_iteration[1]))
    np.save(name+"train_loss_2.npy", np.array(train_loss_iteration[2]))
    np.save(name+"test_loss_2.npy", np.array(test_loss_iteration[2]))
    np.save(name+"norm_2.npy", np.array(norm_iteration[2]))
    np.save(name+"train_accuracy_2.npy", np.array(train_accuracy_iteration[2]))
    np.save(name+"test_accuracy_2.npy", np.array(test_accuracy_iteration[2]))

In [None]:
def dynamic(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,name):
    """ Perform gradient descent with dynamic stepsize and save the results"""
    alpha, alpha_list,stepsizes_dynamic = optimiseur.dynamic_stepsize(X_train, y_train, init.copy(), max_iters, target, criterion,True)
    train_loss_dynamic, test_loss_dynamic, norm_dynamic, train_accuracy_dynamic, test_accuracy_dynamic = modele.compute_all(X_train,X_test, y_train, y_test, alpha_list)
    
    np.save(name+"train_loss.npy", train_loss_dynamic)
    np.save(name+"test_loss.npy", test_loss_dynamic)
    np.save(name+"norm.npy", norm_dynamic)
    np.save(name+"train_accuracy.npy", train_accuracy_dynamic)
    np.save(name+"test_accuracy.npy", test_accuracy_dynamic)
    np.save(name+"stepsizes.npy", stepsizes_dynamic)

In [None]:
def optimal(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,name):
    """ Perform gradient descent with optimal stepsize and save the results"""
    alpha, alpha_list,stepsizes = optimiseur.optimal_stepsize(X_train, y_train, init.copy(), max_iters, target, criterion,True)
    train_loss, test_loss, norm, train_accuracy, test_accuracy = modele.compute_all(X_train,X_test, y_train, y_test, alpha_list)
    np.save(name+"train_loss.npy", train_loss)
    np.save(name+"test_loss.npy", test_loss)
    np.save(name+"norm.npy", norm)
    np.save(name+"train_accuracy.npy", train_accuracy)
    np.save(name+"test_accuracy.npy", test_accuracy)
    np.save(name+"stepsizes.npy", stepsizes)

In [None]:
def exact(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,name):
    """ Perform gradient descent with exact stepsize and save the results"""
    alpha, alpha_list,stepsizes = optimiseur.exact_stepsize(X_train, y_train, init.copy(), max_iters, target, criterion,True)
    train_loss, test_loss, norm, train_accuracy, test_accuracy = modele.compute_all(X_train,X_test, y_train, y_test, alpha_list)
    np.save(name+"train_loss.npy", train_loss)
    np.save(name+"test_loss.npy", test_loss)
    np.save(name+"norm.npy", norm)
    np.save(name+"train_accuracy.npy", train_accuracy)
    np.save(name+"test_accuracy.npy", test_accuracy)
    np.save(name+"stepsizes.npy", stepsizes)

In [None]:
def periodic(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,name):
    """ Perform gradient descent with periodic stepsize and save the results"""
    alpha, alpha_list,stepsizes = optimiseur.Periodic(X_train, y_train, init.copy(), max_iters, target, criterion,True)
    train_loss, test_loss, norm, train_accuracy, test_accuracy = modele.compute_all(X_train,X_test, y_train, y_test, alpha_list)
    np.save(name+"train_loss.npy", train_loss)
    np.save(name+"test_loss.npy", test_loss)
    np.save(name+"norm.npy", norm)
    np.save(name+"train_accuracy.npy", train_accuracy)
    np.save(name+"test_accuracy.npy", test_accuracy)
    np.save(name+"stepsizes.npy", stepsizes)

In [None]:
# Initialize the two datasets
X_train, y_train, X_test, y_test = CKN_MNIST(10000,0.8)
X_train_a4a, y_train_a4a, X_test_a4a, y_test_a4a = a4a()

In [None]:
# Create the model and optimizer for CKN MNIST dataset
modele, optimiseur, L, mu, init = create_modele(0,30,X_train)

In [None]:
# Create the model and optimizer for a4a dataset
modele_a4a, optimiseur_a4a, L_a4a, mu_a4a, init_a4a = create_modele(0,0.6,X_train_a4a)

In [None]:
print(L)
print(L_a4a)

0.17393520948644733
0.10023564216320321
(np.float64(0.17393520948644733), np.float64(1.2777231223710166e-07))


In [None]:
# Define the parameters for the optimization
max_iters=5000
target=1e-2
criterion='norm'

In [None]:
# Perform gradient descent with different large constant learning rates for a4a dataset
gd_mean(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"gd_mean_001_a4a")

In [None]:
# Perform gradient descent with different constant learning rates for a4a dataset
gd(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"gd_001_a4a")

In [None]:
# Perform gradient descent with dynamic stepsize for a4a dataset
dynamic(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"dynamic_001_a4a")

In [None]:
# Perform gradient descent with optimal stepsize for a4a dataset
optimal(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"optimal_001_a4a")

In [None]:
# Perform gradient descent with exact stepsize for a4a dataset
exact(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"exact_001_a4a")

In [None]:
# Perform gradient descent with periodic stepsize for a4a dataset
periodic(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"periodic_001_a4a")

In [None]:
# Perform gradient descent with different large constant learning rates for CKN-MNIST dataset
gd_mean(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"gd_mean_001")

In [None]:
# Perform gradient descent with different constant learning rates for CKN-MNIST dataset
gd(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"gd_001")

In [None]:
# Perform gradient descent with dynamic stepsize for CKN-MNIST dataset
dynamic(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"dynamic_001")

In [None]:
# Perform gradient descent with optimal stepsize for CKN-MNIST dataset
optimal(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"optimal_001")

In [None]:
# Perform gradient descent with exact stepsize for CKN-MNIST dataset
exact(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"exact_001")

In [None]:
# Perform gradient descent with periodic stepsize for CKN-MNIST dataset
periodic(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"periodic_001")

In [None]:
# do the same for other stopping criterion
max_iters=5000
target=5e-3
criterion='norm'

In [57]:
gd_mean(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"gd_mean_0005")

In [56]:
gd(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"gd_0005")

In [55]:
dynamic(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"dynamic_0005")

In [54]:
optimal(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"optimal_0005")

In [53]:
exact(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"exact_0005")

In [52]:
periodic(optimiseur,modele,X_train,y_train,X_test,y_test,init,L,max_iters,criterion,target,"periodic_0005")

In [46]:
gd_mean(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"gd_mean_0005_a4a")

In [47]:
gd(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"gd_0005_a4a")

In [48]:
dynamic(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"dynamic_0005_a4a")

In [49]:
optimal(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"optimal_0005_a4a")

In [51]:
exact(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"exact_0005_a4a")

In [50]:
periodic(optimiseur_a4a,modele_a4a,X_train_a4a,y_train_a4a,X_test_a4a,y_test_a4a,init_a4a,L_a4a,max_iters,criterion,target,"periodic_0005_a4a")