In [1]:
import pandas as pd
import os
import numpy as np

import Utils as ut
import CortesAlignmentFile as ca
import mySampler as ms
 
from sklearn.model_selection import StratifiedShuffleSplit, cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import normalize

  from collections import Sequence


## Dataset Configuration

In [2]:
d_toy = pd.read_csv(os.path.join('data', 'toyDataset.csv'))
outputs = pd.read_csv(os.path.join('data', 'toyLabel.csv'))

In [3]:
toy = d_toy.values
y = outputs.values.reshape(-1)

In [4]:
# COMPUTATIONAL COMPLEXITY: Reduce #samples
tr_idx, ts_idx = next(StratifiedShuffleSplit(n_splits=1, test_size=0.25).split(toy, y))

In [5]:
ds1 = np.hstack([toy[:,:5], toy[:,10:15]])
ds1_tr = ds1[tr_idx]
ds1_ts = ds1[ts_idx]

ds2 = np.hstack([toy[:,5:10], toy[:,15:20]])
ds2_tr = ds2[tr_idx]
ds2_ts = ds2[ts_idx]

ds3 = toy[:,20:]
ds3_tr = ds3[tr_idx]
ds3_ts = ds3[ts_idx]

y_ = y[tr_idx]
y_test = y[ts_idx]

toy_ = ds1_tr
toy_test = ds1_ts

## Utilities

In [6]:
def centering_normalizing(train, test, exclusion_list = None):
    
    if exclusion_list is not None:
        scale, train = ut.centering_normalizing(train, exclusion_list)
    else:
        scale, train = ut.centering_normalizing(train)

    new_Xts = test-scale

    if exclusion_list is not None:
        new_Xts[:, exclusion_list] = test[:, exclusion_list]

    return train, new_Xts, scale


def normalizing(train, test):
    return normalize(train), normalize(test)


def make_regression(X, y, sparsity):

    scores = np.zeros(len(sparsity))
    
    for idx, sp in enumerate(sparsity):
        model = LogisticRegression(penalty = 'l1', C = sp)
        scores[idx] = np.mean(cross_validate(model,  X, y, return_train_score=False, cv=3)['test_score'])
        
    return scores[np.argmax(scores)]


def learn(C_, C_test, y, y_test, sparsity, centering = False, norm = False):
    
    if centering == True:
        C_, C_test, _ = centering_normalizing(C_, C_test)#, [5,6,7,9,10,13,15,16,17,18,19])

    if norm == True:
         C_, C_test = normalizing(C_, C_test)

    best_alpha = make_regression(C_, y, sparsity)
    model = LogisticRegression(penalty = 'l1', C = best_alpha)
    model.fit(C_, y)
    y_pred = model.predict(C_test)
    coef = model.coef_
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    print("\tAccuracy: {}".format(accuracy))
    print("\tPrecision: {}".format(precision))
    print("\tRecall: {}".format(recall))
    print("Coef: {}".format(coef))
    print("BestLambda: {}".format(best_alpha))

In [7]:
sparsity = [0.01, 0.03, 0.05, 0.07, 0.1, 0.12]

## Basic approach

### Dementia

In [8]:
learn(toy_, toy_test, y_, y_test, sparsity)

	Accuracy: 0.5733333333333334
	Precision: 0.5609756097560976
	Recall: 0.6216216216216216
Coef: [[ 0.10425312  0.          0.          0.08854682  0.         -0.13742921
   0.35897197  0.12050082 -0.17710167  0.06637022]]
BestLambda: 0.6444444444444444


### Cardio

In [9]:
learn(toy_, toy_test, y_, y_test, sparsity)

	Accuracy: 0.5733333333333334
	Precision: 0.5609756097560976
	Recall: 0.6216216216216216
Coef: [[ 0.10425681  0.          0.          0.08854634  0.         -0.13744531
   0.35897317  0.12049715 -0.17710262  0.06637184]]
BestLambda: 0.6444444444444444


## Normalized data

### Dementia

In [10]:
learn(toy_, toy_test, y_, y_test, sparsity, norm = True)

	Accuracy: 0.6533333333333333
	Precision: 0.6486486486486487
	Recall: 0.6486486486486487
Coef: [[ 0.          0.          0.          0.          0.         -0.16897547
   1.14854933  0.         -0.44948396  0.        ]]
BestLambda: 0.6177777777777779


### Cardio

In [11]:
learn(toy_, toy_test, y_, y_test, sparsity, norm = True)

	Accuracy: 0.6533333333333333
	Precision: 0.6486486486486487
	Recall: 0.6486486486486487
Coef: [[ 0.          0.          0.          0.          0.         -0.16899248
   1.14854889  0.         -0.44948506  0.        ]]
BestLambda: 0.6177777777777779


## Origin Data Centering

### Dementia

In [12]:
learn(toy_, toy_test, y_, y_test, sparsity, centering = True)

	Accuracy: 0.6266666666666667
	Precision: 0.6097560975609756
	Recall: 0.6756756756756757
Coef: [[ 0.          0.          0.          0.06380438  0.         -0.17913075
   1.12221765  0.01351086 -0.46848729  0.01435427]]
BestLambda: 0.648888888888889


### Cardio

In [13]:
learn(toy_, toy_test, y_, y_test, sparsity, centering = True)

	Accuracy: 0.6266666666666667
	Precision: 0.6097560975609756
	Recall: 0.6756756756756757
Coef: [[ 0.          0.          0.          0.06382848  0.         -0.17913185
   1.12221843  0.01351239 -0.46847033  0.01435962]]
BestLambda: 0.648888888888889


## Origin Data  Centering and Normalization

### Dementia

In [14]:
learn(toy_, toy_test, y_, y_test, sparsity, centering = True, norm = True)

	Accuracy: 0.6266666666666667
	Precision: 0.6097560975609756
	Recall: 0.6756756756756757
Coef: [[ 0.          0.          0.          0.06382798  0.         -0.17914437
   1.12221571  0.01349412 -0.46847407  0.01436504]]
BestLambda: 0.648888888888889


### Cardio

In [15]:
learn(toy_, toy_test, y_, y_test, sparsity, centering = True, norm = True)

	Accuracy: 0.6266666666666667
	Precision: 0.6097560975609756
	Recall: 0.6756756756756757
Coef: [[ 0.          0.          0.          0.06382531  0.         -0.17913564
   1.12221852  0.01351011 -0.46847731  0.01436689]]
BestLambda: 0.648888888888889
