In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [7]:
class TISP:
    def __init__(self, features, lambda_, iterations = 100):
        
        # Initialize parameters 
        self.features = features
        self.iterations = iterations
        self.lambda_ = lambda_
        self.likelihoods = []
        
        self.feature_lambda = [] # Container for 
        
    def sigmoid(self, z):
        
        # Set up sigmoid 
        sig_z = (1/(1+np.exp(-z)))
        
        assert (sig_z.shape == z.shape)
        return sig_z
        
        
    def log_likelihood(self, Xw, y):
        ''' 
        Where: wX is the dot product of the regressor matrix + parameters
               y is the data transformed to -> {0, 1}
               y_ are the true labels of X in space -> {-1, +1}
               
               
        Loss function uses [-1, 1] labels
        '''
        
        likelihood = (1/Xw.shape[0])*sum(np.log(1 + np.exp(-y*Xw))) #+ \
                      # self.lambda_*sum(#P(x))        
        return likelihood

    
    
    def cap_lambda(self, current_lambda):
        non_zero_ind = np.where(self.weights!=0)
        non_zero_features = len(non_zero_ind[0])
        
        token = "L:" + str(current_lambda) + '/F:' + str(non_zero_features)
        
        return token
            
    def fit(self, X, y):
        '''
        Where X is the regressor matrix
              y are the labels {-1, +1}
              
        Algorithm uses [0, 1] labels
        '''
        num_obs = X.shape[0]
        num_features = X.shape[1]
        
        # Start w/ zero weights
        self.weights = np.zeros(num_features)
        
        # Transform y to be {0, 1}
        y_ = np.where(y == -1, 0, y)
        
        for j in range(len(self.lambda_)):
            for i in range(self.iterations):
            
                Xw = np.dot(X, self.weights)
                sig_Xw = self.sigmoid(Xw)

                # Try the other way later - just use the loss from logistic regression with penalty
                gradient = np.dot(X.T, y_ - sig_Xw)


                # Calculate weights
                self.weights += (1/num_obs)*gradient
                self.weights *= (np.absolute(self.weights) > self.lambda_[j])


                # Calculating log likelihood
                likelihood = self.log_likelihood(Xw,y)

                self.likelihoods.append(likelihood)
                
                # Capture lambda per feature
                self.feature_lambda.append(self.cap_lambda(j))
                
            
                        
            
    def predict_proba(self,X):
    
        if self.weights is None:
            raise Exception("Fit the model before prediction")
                     
        z = np.dot(X,self.weights)
        probabilities = self.sigmoid(z)
                
        return probabilities
    
    
    def predict(self, X, threshold=0.5):
        # Thresholding probability to predict binary values
        binary_predictions = np.array(list(map(lambda x: 1 if x>threshold else 0, self.predict_proba(X))))
        
        return binary_predictions
        
        
        
        

## Problem 1.a:

In [8]:
# Data input
X_train_a = pd.read_csv('data/Gisette/gisette_train.data', sep='\s+', header=None)
y_train_a = pd.read_csv('data/Gisette/gisette_train.labels', header=None)

X_test_a = pd.read_csv('data/Gisette/gisette_valid.data',  sep='\s+', header=None)
y_test_a = pd.read_csv('data/Gisette/gisette_valid.labels', header=None)

# Prepare data for matrix algebra
y_train_a = y_train_a.values.ravel()
y_test_a = y_test_a.values.ravel()

In [9]:
# Normalize the variables of the training set
scaler_a = StandardScaler()
scaler_a.fit(X_train_a)

StandardScaler()

In [10]:
# Transform both the train and test set
X_train_scaled_a = scaler_a.transform(X_train_a)
X_test_scaled_a = scaler_a.transform(X_test_a)

In [None]:
features = [10, 30, 100, 300, 1000]
lambdas = list(range(220))

model_a = TISP(features = features, lambda_ = lambdas)
model_a.fit(X_train_a, y_train_a)

  sig_z = (1/(1+np.exp(-z)))
  likelihood = (1/Xw.shape[0])*sum(np.log(1 + np.exp(-y*Xw))) #+ \


In [None]:
# Training Misclassification
preds_train_a = model_a.predict(X_train_scaled_a)
1 - accuracy_score(y_train_a, preds_train_a)

In [None]:
# Testing Misclassification
preds_test_a = model_a.predict(X_test_scaled_a)
1 - accuracy_score(y_test_a, preds_test_a)

In [None]:
model_a.feature_lambda