In this notebook, we implement two algorithms inspired by Bregman distances presented in 'Logistic Regression, AdaBoost and Bregman Distances' (Schapire et al 2002) and 'Bregman Distance to L1 Regularized Logistic Regression' (Huang and Gupta, 2010). We compare them to two more well known algorithms - Logistic Regression (Newton's Method, No Regularization) and Lasso Regression. 

We run these 4 algorithms on a variety of datasets and note any patterns on how the nature of the datasets affect the accuracy and convergence rate of the algorithms. 

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt


# Dictionary of datasets, keys are names (string) of datasets, values are 4 tuples: (X_train, X_test, y_train, y_test)
datasets = dict()


# Import MNIST data

In [2]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')



In [3]:
# Should be 70,000 images (28 by 28 for dimensionality of 784)  
print("Image Data Shape" , mnist.data.shape)
print("Label Data Shape", mnist.target.shape)

Image Data Shape (70000, 784)
Label Data Shape (70000,)


In [4]:
# We will focus on binary classification of images with label 0 or 1
mnist_relevant_indices = np.where(mnist.target <= 1.0)[0]

In [5]:
data = mnist.data[mnist_relevant_indices]
target = mnist.target[mnist_relevant_indices]

In [6]:
from sklearn.model_selection import train_test_split
datasets['MNIST'] = train_test_split(data, target, test_size=1/7.0, random_state=0)

# Import Fashion MNIST data

In [7]:
import fashion_mnist_reader
# This requires the data from https://github.com/zalandoresearch/fashion-mnist has been downloaded into data/fashion
X_train, y_train = fashion_mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = fashion_mnist_reader.load_mnist('data/fashion', kind='t10k')

In [8]:
#Filter for labels 0 and 1
fmnist_train_data = X_train[np.where(y_train <= 1.0)[0]]
fmnist_train_label = y_train[np.where(y_train <= 1.0)[0]]
fmnist_test_data = X_test[np.where(y_test <= 1.0)[0]]
fmnist_test_label = y_test[np.where(y_test <= 1.0)[0]]

datasets['FMNIST'] = (fmnist_train_data, fmnist_test_data, fmnist_train_label, fmnist_test_label)

# Import Ionosphere data

In [9]:
# Wait for sklearn 0.20.1 release for this to be fixed
#from sklearn.datasets import fetch_openml
#iono = fetch_openml(data_id=59)

# Logistic Regression (No Regularization)

In [10]:
from sklearn.linear_model import LogisticRegression

In [11]:
def LogitR(X_train, X_test, y_train, y_test):
    Logit_model = LogisticRegression(C = 1e6, solver = 'lbfgs', max_iter = 1000)
    Logit_model.fit(X_train, y_train)
    predictions = Logit_model.predict(X_test)
    accuracy = Logit_model.score(X_test, y_test)
    weights = np.concatenate([Logit_model.intercept_, Logit_model.coef_[0]])
    return weights, predictions, accuracy

In [12]:
mnist_test_accuracy = LogitR(*datasets['MNIST'])[2]

In [13]:
mnist_test_accuracy

0.9981060606060606

In [14]:
fmnist_test_accuracy = LogitR(*datasets['FMNIST'])[2]

In [15]:
fmnist_test_accuracy

0.983

# Lasso Regression (L1 Regularization)

In [16]:
from sklearn.linear_model import LassoCV

In [17]:
def LassoR(X_train, X_test, y_train, y_test):
    Lasso_model = LassoCV(cv=7)
    Lasso_model.fit(X_train, y_train)
    predictions = Lasso_model.predict(X_test)
    accuracy = Lasso_model.score(X_test, y_test)
    weights = np.concatenate([[Lasso_model.intercept_], Lasso_model.coef_])
    return weights, predictions, accuracy

In [18]:
LassoR(*datasets['MNIST'])[2]

0.9705900434492414

In [19]:
LassoR(*datasets['FMNIST'])[2]

0.8926456366448151

# Bregman Logistic Regression by Schapire et al.

In [20]:
def BregmanLogit(X_train, X_test, y_train, y_test):
    from scipy.special import expit as h # Logistic Sigmoid
    np.set_printoptions(edgeitems=12)
    
    # First preprocess the data to include a bias parameter and have targets as +1, -1.
    X_train = np.concatenate([np.ones((X_train.shape[0],1)), X_train], axis = 1)
    X_test = np.concatenate([np.ones((X_test.shape[0],1)), X_test], axis = 1)
    y_train = 2*y_train.astype(int) - 1
    y_test = 2*y_test.astype(int) - 1
    
    n_train_samples, x_dim = X_train.shape
    # Train weight vector (Parallel Algorithm, Section 5)
    
    w = np.zeros(x_dim)
    q = 1/2 * np.ones(n_train_samples)
    M = X_train * y_train[:, np.newaxis] # Makes M[i] = y[i] * x[i] so M[i][j] = y[i] x[i][j]
    
    #print("M", M.shape, M, np.count_nonzero(M))
    #print(np.sum(M<0))
    M_pos = np.multiply(M, M>0)
    M_neg = np.multiply(-M, M<0)

    iters = 3
    for t in range(1,iters):
        # Update q
        
        # Problem - q vanishes very rapidly
        
        print("q start:", q)
        if t==1: 
            q = 1/2 * np.ones(n_train_samples)
        if t>1: 
            q = np.divide(q, np.multiply(1-q, np.exp(M @ d)) + q)
        print("q end: ", q)
        # Update d
        W_pos = q @ M_pos + 1e-3
        W_neg = q @ M_neg + 1e-3
        #print("W_pos", W_pos.shape, np.count_nonzero(W_pos))
        #print(W_pos)
        #print("W_neg", W_neg.shape, np.count_nonzero(W_neg))
        #print(W_neg)

        d = 1/2 * np.log(np.divide(W_pos, W_neg))

        w += d
    
    # Make predictions on test and evaluate accuracy
    predictions = np.around(h(X_test @ w))
    accuracy = np.mean(y_test.T==predictions)
    #predictions = 0
    #accuracy = 0
    return w, predictions, accuracy

In [22]:
BregmanLogit(*datasets['FMNIST'])[2]

q start: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ... 0.5 0.5 0.5 0.5 0.5
 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
q end:  [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ... 0.5 0.5 0.5 0.5 0.5
 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
q start: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ... 0.5 0.5 0.5 0.5 0.5
 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
q end:  [0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 1.00000000e+000 0.00000000e+000 1.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000 ...
 6.10433169e-106 0.00000000e+000 0.00000000e+000 1.00000000e+000
 1.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000]


0.5