In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch

from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RidgeClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn.preprocessing import LabelBinarizer

import time

import random_features
from multiple_regression_solver import MultipleRegressionSolver

In [2]:
# (train_data, train_labels), (test_data, test_labels) = FashionMNIST()

train_data = np.load('../../datasets/export/fashion_mnist/numpy/train_data_fashion_mnist.npy').astype('uint8')
test_data = np.load('../../datasets/export/fashion_mnist/numpy/test_data_fashion_mnist.npy').astype('uint8')
train_labels = np.load('../../datasets/export/fashion_mnist/numpy/train_targets_fashion_mnist.npy').astype('uint8')
test_labels = np.load('../../datasets/export/fashion_mnist/numpy/test_targets_fashion_mnist.npy').astype('uint8')

# Convert one-hot to integers
train_labels = np.argmax(train_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)

D = train_data[0].reshape(-1).shape[0]

# Flatten the images
train_data = train_data.reshape(-1, D)
test_data = test_data.reshape(-1, D)

## Project the data

In [3]:
def threshold_binarize(data, threshold):
    data_bin = np.where(data>threshold, 1, 0).astype('uint8')
    return data_bin

In [4]:
# fashion mnist has values between 0 and 255
threshold = 10

train_data_bin = threshold_binarize(train_data, threshold)
test_data_bin = threshold_binarize(test_data, threshold)

In [7]:
train_data_bin = threshold_binarize(train_data, 10)
test_data_bin = threshold_binarize(test_data, 10)
# we need to work with flot32 or float64 for some reason.
# otherwise numpy freezes
all_data = np.vstack([train_data_bin, test_data_bin]).astype('float32')

In [23]:
data_proj = synthetic_opu.project_big_np_matrix(all_data, out_dim=100000, chunk_size=5000, projection='opu',
                          framework='pytorch', dtype=torch.FloatTensor, cuda=True)

Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Processing chunk of size: torch.Size([5000, 784])
Total time elapsed (seconds): 115.48934745788574
Time per chunk (seconds): 8.249239104134697


In [24]:
train_proj = data_proj[:len(train_data_bin)]
test_proj = data_proj[len(train_data_bin):]

## Regression on the features

In [7]:
# like one-hot encoding with 0 corresponding to -1
label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
train_labels_bin = label_binarizer.fit_transform(train_labels)
test_labels_bin = label_binarizer.fit_transform(test_labels)

In [29]:
solver = MultipleRegressionSolver(train_proj, train_labels_bin, batch_size=128, cuda=True)

## Synthetic OPU

### 1K Features

In [15]:
optimizer = torch.optim.Adam(solver.model.parameters(), lr=1e-3)

In [16]:
coefficients = solver.fit(optimizer, epochs=50)

Epoch: 0 Loss: 0.1977643072605133 Accuracy: 82.2
Epoch: 1 Loss: 0.24057672917842865 Accuracy: 78.5
Epoch: 2 Loss: 0.1988341361284256 Accuracy: 81.1
Epoch: 3 Loss: 0.19358235597610474 Accuracy: 82.8
Epoch: 4 Loss: 0.17926564812660217 Accuracy: 85.3
Epoch: 5 Loss: 0.18776938319206238 Accuracy: 84.3
Epoch: 6 Loss: 0.18488942086696625 Accuracy: 83.4
Epoch: 7 Loss: 0.184470534324646 Accuracy: 83.7
Epoch: 8 Loss: 0.17958873510360718 Accuracy: 80.9
Epoch: 9 Loss: 0.16421706974506378 Accuracy: 85.0
Epoch: 10 Loss: 0.16249994933605194 Accuracy: 83.5
Epoch: 11 Loss: 0.16840437054634094 Accuracy: 83.9
Epoch: 12 Loss: 0.17301011085510254 Accuracy: 82.2
Epoch: 13 Loss: 0.17343157529830933 Accuracy: 81.9
Epoch: 14 Loss: 0.15697996318340302 Accuracy: 84.0
Epoch: 15 Loss: 0.14851225912570953 Accuracy: 84.6
Epoch: 16 Loss: 0.15340712666511536 Accuracy: 82.8
Epoch: 17 Loss: 0.14542700350284576 Accuracy: 83.5
Epoch: 18 Loss: 0.14289404451847076 Accuracy: 83.4
Epoch: 19 Loss: 0.14925725758075714 Accuracy:

In [17]:
solver.classification_score(test_proj, test_labels_bin)

Accuracy: 85.21666666666667


85.21666666666667

### 10K Features

In [17]:
optimizer = torch.optim.Adam(solver.model.parameters(), lr=1e-4)

In [18]:
coefficients = solver.fit(optimizer, epochs=50)

Epoch: 0 Loss: 19.49797248840332 Accuracy: 12.5
Epoch: 1 Loss: 0.9232721328735352 Accuracy: 53.125
Epoch: 2 Loss: 0.6023041605949402 Accuracy: 64.84375
Epoch: 3 Loss: 0.4681790769100189 Accuracy: 72.65625
Epoch: 4 Loss: 0.4207070469856262 Accuracy: 70.3125
Epoch: 5 Loss: 0.35498887300491333 Accuracy: 75.0
Epoch: 6 Loss: 0.35204654932022095 Accuracy: 85.9375
Epoch: 7 Loss: 0.3081686198711395 Accuracy: 83.59375
Epoch: 8 Loss: 0.33145350217819214 Accuracy: 78.90625
Epoch: 9 Loss: 0.36751434206962585 Accuracy: 76.5625
Epoch: 10 Loss: 0.23600712418556213 Accuracy: 86.71875
Epoch: 11 Loss: 0.2516993284225464 Accuracy: 83.59375
Epoch: 12 Loss: 0.2738135755062103 Accuracy: 80.46875
Epoch: 13 Loss: 0.21624425053596497 Accuracy: 88.28125
Epoch: 14 Loss: 0.22382351756095886 Accuracy: 84.375
Epoch: 15 Loss: 0.29099106788635254 Accuracy: 80.46875
Epoch: 16 Loss: 0.22866110503673553 Accuracy: 86.71875
Epoch: 17 Loss: 0.24393053352832794 Accuracy: 79.6875
Epoch: 18 Loss: 0.22331933677196503 Accuracy:

In [19]:
solver.classification_score(test_proj, test_labels_bin)

Accuracy: 88.74666666666667


88.74666666666667

### 100K Features

In [30]:
optimizer = torch.optim.Adam(solver.model.parameters(), lr=1e-5)

In [31]:
coefficients = solver.fit(optimizer, epochs=50)

Epoch: 0 Loss: 35.02031326293945 Accuracy: 21.09375
Epoch: 1 Loss: 1.1676433086395264 Accuracy: 54.6875
Epoch: 2 Loss: 0.6797206401824951 Accuracy: 66.40625
Epoch: 3 Loss: 0.5919178128242493 Accuracy: 66.40625
Epoch: 4 Loss: 0.5301603078842163 Accuracy: 67.96875
Epoch: 5 Loss: 0.4421910345554352 Accuracy: 79.6875
Epoch: 6 Loss: 0.4331490397453308 Accuracy: 68.75
Epoch: 7 Loss: 0.34248507022857666 Accuracy: 78.125
Epoch: 8 Loss: 0.33059290051460266 Accuracy: 74.21875
Epoch: 9 Loss: 0.37924784421920776 Accuracy: 74.21875
Epoch: 10 Loss: 0.2868381440639496 Accuracy: 81.25
Epoch: 11 Loss: 0.2873137593269348 Accuracy: 80.46875
Epoch: 12 Loss: 0.2708255350589752 Accuracy: 80.46875
Epoch: 13 Loss: 0.3619306683540344 Accuracy: 74.21875
Epoch: 14 Loss: 0.2695775032043457 Accuracy: 82.8125
Epoch: 15 Loss: 0.36703965067863464 Accuracy: 64.0625
Epoch: 16 Loss: 0.25032204389572144 Accuracy: 84.375
Epoch: 17 Loss: 0.22382381558418274 Accuracy: 84.375
Epoch: 18 Loss: 0.25698143243789673 Accuracy: 82.

In [32]:
solver.classification_score(test_proj, test_labels_bin)

Accuracy: 91.075


91.075

## Fitting the RBF kernel during regression

In [35]:
import torch.nn as nn

class RegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim, feature_layer=None, zero_init=True):
        super(RegressionModel, self).__init__()
        
        self.feature_layer = feature_layer
        
        if feature_layer is not None:
            d_in = feature_layer.output_features
        else:
            d_in = input_dim
        self.layer = nn.Linear(d_in, output_dim, bias=False)
        
        if zero_init:
            torch.nn.init.zeros_(self.layer.weight)
        
    def forward(self, input):
        if self.feature_layer is not None:
            output = self.feature_layer.forward(input)
        output = self.layer.forward(output)
        return output

In [46]:
def train(model, train_loader, optimizer, epoch, cuda):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.type(torch.FloatTensor), target.type(torch.FloatTensor)
        if cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = torch.nn.functional.mse_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, test_loader, cuda):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.type(torch.FloatTensor), target.type(torch.FloatTensor)
            if cuda:
                data, target = data.cuda(), target.cuda()
            output = model(data)
            test_loss += torch.nn.functional.mse_loss(output, target).item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.argmax(dim=1, keepdim=True)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
from torch.utils.data import Dataset, DataLoader
    
class BasicDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, X, Y):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

In [50]:
train_dataset = BasicDataset(train_data_bin, train_labels_bin)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0)

test_dataset = BasicDataset(test_data_bin, test_labels_bin)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=True, num_workers=0)

rbf_layer = random_features.RBFModulePyTorch(784, 10000, tunable_kernel=True)
model = RegressionModel(784, 10, feature_layer=rbf_layer, zero_init=False).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(50):
    train(model, train_loader, optimizer, epoch, True)
    test(model, test_loader, True)


Test set: Average loss: 0.0011, Accuracy: 8194/10000 (82%)


Test set: Average loss: 0.0011, Accuracy: 8265/10000 (83%)


Test set: Average loss: 0.0013, Accuracy: 8286/10000 (83%)


Test set: Average loss: 0.0013, Accuracy: 8378/10000 (84%)


Test set: Average loss: 0.0014, Accuracy: 8433/10000 (84%)


Test set: Average loss: 0.0015, Accuracy: 8496/10000 (85%)


Test set: Average loss: 0.0011, Accuracy: 8614/10000 (86%)


Test set: Average loss: 0.0012, Accuracy: 8473/10000 (85%)


Test set: Average loss: 0.0013, Accuracy: 8302/10000 (83%)


Test set: Average loss: 0.0012, Accuracy: 8633/10000 (86%)


Test set: Average loss: 0.0011, Accuracy: 8651/10000 (87%)


Test set: Average loss: 0.0011, Accuracy: 8657/10000 (87%)


Test set: Average loss: 0.0012, Accuracy: 8605/10000 (86%)


Test set: Average loss: 0.0015, Accuracy: 8467/10000 (85%)


Test set: Average loss: 0.0011, Accuracy: 8629/10000 (86%)


Test set: Average loss: 0.0011, Accuracy: 8659/10000 (87%)


Test set: Average loss:

In [55]:
1./784

0.0012755102040816326

In [56]:
1./(2*torch.exp(model.feature_layer.log_lengthscales)**2).mean()

tensor(0.0020, device='cuda:0', grad_fn=<MulBackward0>)