In [334]:
from gradient_descent import sklearn_optimize
from data import sample_weights
import numpy as np
# Let's see how well we do on the test set
from theoretical import predict_erm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from data_model import CustomSpectra

def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [335]:
X_train, y_train = load_mnist('data/fashion', kind='train')
X_test, y_test = load_mnist('data/fashion', kind='t10k')

In [336]:
y_test

array([9, 2, 1, ..., 8, 1, 5], dtype=uint8)

In [337]:
# Let's use a subset of the data and learn to distinguish between two classes
# Let's pick t-shirt 0 from sneaker 7
X_train = X_train[np.logical_or(y_train == 0, y_train == 7)]
y_train = y_train[np.logical_or(y_train == 0, y_train == 7)]
X_test = X_test[np.logical_or(y_test == 0, y_test == 7)]
y_test = y_test[np.logical_or(y_test == 0, y_test == 7)]

# change the datatype to int64
X_train = X_train.astype(np.float64)
X_test = X_test.astype(np.float64)
y_test = y_test.astype(np.float64)
y_train = y_train.astype(np.float64)


# Compute the standard deviation and replace 0 with 1
std = X_train.std(axis=0)
std[std == 0] = 1
std_test = X_test.std(axis=0)
std_test[std_test == 0] = 1

# Subtract the mean and divide by the standard deviation
X_train = (X_train - X_train.mean(axis=0)) / std
X_test = (X_test - X_test.mean(axis=0)) / std_test

In [338]:
# Do we have the same number of examples for each class?
print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))
# Yes, we do!


(array([0., 7.]), array([6000, 6000]))
(array([0., 7.]), array([1000, 1000]))


In [339]:

# Make the labels binary
y_train[y_train == 0] = -1
y_train[y_train == 7] = 1
y_test[y_test == 0] = -1
y_test[y_test == 7] = 1

In [340]:
# Let's put the data into a json object and pickle it
import json
import pickle

data = {
    'X_train': X_train.tolist(),
    'y_train': y_train.tolist(),
    'X_test': X_test.tolist(),
    'y_test': y_test.tolist()
}

with open('data/fashion_mnist.json', 'w') as f:
    json.dump(data, f)

# Let's pickle the data
with open('data/fashion_mnist.pkl', 'wb') as f:
    pickle.dump(data, f)

In [341]:
# Let's read the pickle file
with open('data/fashion_mnist.pkl', 'rb') as f:
    data = pickle.load(f)

In [342]:
# Let's read the json file
with open('data/fashion_mnist.json', 'r') as f:
    data = json.load(f)

In [343]:
ntot = 12000
Omega = X_train.T @ X_train / ntot # student-student
rho = y_train.dot(y_train) / ntot
spec_Omega, U = np.linalg.eigh(Omega)
diagUtPhiPhitU = np.diag(1/ntot * U.T @ X_train.T @ y_train.reshape(ntot,1) @ 
                    y_train.reshape(1,ntot) @ X_train @ U)

In [344]:
# Let's add Omega, rho, spec_Omega and diagUtPhiPhitU to the data
data['Omega'] = Omega.tolist()
data['rho'] = rho.tolist()
data['spec_Omega'] = spec_Omega.tolist()
data['diagUtPhiPhitU'] = diagUtPhiPhitU.tolist()

# Let's pickle the data
with open('data/fashion_mnist.pkl', 'wb') as f:
    pickle.dump(data, f)

In [345]:
# get nunique for y_train
np.unique(y_train, return_counts=True)

(array([-1.,  1.]), array([6000, 6000]))

In [346]:
X_test.dtype

dtype('float64')

In [347]:
lam = 0.1

In [348]:
clf = LogisticRegression(random_state=0, solver='lbfgs',max_iter=1000,C=1/lam).fit(X_train, y_train)
w_lr = clf.coef_.flatten()

print("Norm of weights: %f" % np.linalg.norm(w_lr))
y_pred = predict_erm(X_test, w_lr)
print("Accuracy: %f" % accuracy_score(y_test, y_pred))

# print the regularization parameter
print(clf.C)


Norm of weights: 2.600423
Accuracy: 0.997000
10.0


In [349]:
w_gd = sklearn_optimize(sample_weights(784),X_train,y_train,lam,0)
print("Norm of the weights: %f" % np.linalg.norm(w_gd))

Norm of the weights: 2.752518


In [350]:
epsilon = 10
w_gd_adv = sklearn_optimize(sample_weights(784),X_train,y_train,lam,epsilon)
print("Norm of the weights: %f" % np.linalg.norm(w_gd_adv))

Norm of the weights: 28.939971


In [351]:


y_pred = predict_erm(X_test, w_gd)
print("Accuracy on test set with gradient descent: {}".format(accuracy_score(y_test,y_pred)))

y_pred = predict_erm(X_test, w_gd_adv)
print("Accuracy on test set with adversarial gradient descent: {}".format(accuracy_score(y_test,y_pred)))


Accuracy on test set with gradient descent: 0.9995
Accuracy on test set with adversarial gradient descent: 0.9995


In [352]:
# Now let's compute adversarial examples for the test set
# The best attack is given by -y*epsilon* w / ||w||_2
# Let's compute it
epsilon = 3
w_lr_norm = np.linalg.norm(w_lr)
# Let's compute the adversarial examples
X_test_adv = X_test - epsilon * y_test[:, np.newaxis] * w_lr / w_lr_norm


In [353]:
# Let's see how well we do on the test set
y_pred = predict_erm(X_test_adv, w_lr)
print("Accuracy on test set with adversarial examples: {}".format(accuracy_score(y_test,y_pred)))
# using gradient descent
y_pred = predict_erm(X_test_adv, w_gd)
print("Accuracy on test set with adversarial examples: {}".format(accuracy_score(y_test,y_pred)))
# using adversarial gradient descent
y_pred = predict_erm(X_test_adv, w_gd_adv)
print("Accuracy on test set with adversarial examples: {}".format(accuracy_score(y_test,y_pred)))

Accuracy on test set with adversarial examples: 0.9735
Accuracy on test set with adversarial examples: 0.994
Accuracy on test set with adversarial examples: 0.9935


# Theory

In [354]:
# teacher features
p = 12000
gamma = 12000/784



In [355]:
# Student-student covariance
print('Computing covariances')
Omega = X_train.T @ X_train / p # student-student

print('Computing rho')
rho = y_train.dot(y_train) / p

print('Diagonalising the student-student covariance')
spec_Omega, U = np.linalg.eigh(Omega)

print('Projecting teacher-student correlation on student-student basis')
diagUtPhiPhitU = np.diag(1/p * U.T @ X_train.T @ y_train.reshape(p,1) @ 
                         y_train.reshape(1,p) @ X_train @ U)

Computing covariances
Computing rho
Diagonalising the student-student covariance
Projecting teacher-student correlation on student-student basis


In [356]:
data_model = CustomSpectra(gamma = gamma,
                           rho = rho, 
                           spec_Omega = spec_Omega, 
                           diagonal_term = diagUtPhiPhitU)

# MNIST Scattering as in Loureiro

In [357]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torchvision.datasets as datasets
import torch
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [358]:
def preprocess_data(dataset):
    '''
    Preprocess MNIST data set: 
    center, divide by global variance and assign labels +1 for even, -1 for odd digits.
    
    Args:
        - pyTorch dataset
    
    Return:
        - tuple (X,y) where both X and y are numpy arrays.
    '''
    n_samples, _, _ = dataset.data.shape
    X = torch.clone(dataset.data).float()
    y = torch.clone(dataset.targets).view(n_samples,).float()
    
    data, labels = [], []
    # Extract digits and create labels
    for k,label in enumerate(y):
        if label in [0, 2, 4, 6, 8]:
            data.append(X[k].numpy())
            labels.append(1)
        elif label in [1, 3, 5, 7, 9]:
            data.append(X[k].numpy())
            labels.append(-1)
            
    data = np.array(data)
    data -= data.mean(axis=0)
    data /= data.std()

    return np.array(data), np.array(labels)

In [359]:
# Load MNIST 
mnist = datasets.MNIST(root='data', train=True, download=True, transform=None)

# Pre-process
C, y = preprocess_data(mnist)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 17327133.28it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 29119157.17it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 12369088.59it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 15941865.08it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [360]:
from kymatio.sklearn import Scattering2D

ntot, dx, dy = C.shape
S = Scattering2D(shape=(dx,dy), J=3, L=8)

In [361]:
# Data in feature spac
V = S(C).reshape(ntot, -1)

_, d = V.shape

print('Feature space dimension: {}'.format(d))

Feature space dimension: 1953


In [362]:
# teacher features
p = ntot
gamma = p/d

# l2 regularisation parameter
lamb = 0.01

In [363]:
# Student-student covariance
print('Computing covariances')
Omega = V.T @ V / ntot # student-student

Computing covariances


In [364]:
print('Computing rho')
rho = y.dot(y) / ntot

print('Diagonalising the student-student covariance')
spec_Omega, U = np.linalg.eigh(Omega)

print('Projecting teacher-student correlation on student-student basis')
diagUtPhiPhitU = np.diag(1/ntot * U.T @ V.T @ y.reshape(p,1) @ 
                         y.reshape(1,p) @ V @ U)

Computing rho
Diagonalising the student-student covariance
Projecting teacher-student correlation on student-student basis


In [None]:
# create a data json object
data = {
    'X_train': V.tolist(),
    'y_train': y.tolist(),
    'Omega': Omega.tolist(),
    'rho': rho.tolist(),
    'spec_Omega': spec_Omega.tolist(),
    'diagUtPhiPhitU': diagUtPhiPhitU.tolist()
}

# Let's pickle the data
with open('data/mnist.pkl', 'wb') as f:
    pickle.dump(data, f)