This notebook describes an implementation of:

- The Logistic Regression with Hashing Trick to manage high cardinality categorical features

- The loss improvement method describe in the Chapelle's paper (Link below): Estimate the loss improvement if you add a feature in the model without retraining the model

https://people.csail.mit.edu/romer/papers/TISTRespPredAds.pdf

In [None]:
# Install for hashing
!pip install mmh3

In [27]:
from typing import List, Any
import zipfile
import glob
import tqdm
import sys

import pandas as pd
import numpy as np

import mmh3

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split

In [4]:
# Load the dataset
drive_path = 'drive/MyDrive/HeartDisease/heart_2020_cleaned.csv.zip'
local_path_dir = './heart_data/'

In [5]:
def extract_zip_drive_to_local(drive_path: str, local_path: str):
    '''
    Extract Zip dataset to local
    '''
    with zipfile.ZipFile(drive_path, 'r') as zip_ref:
        zip_ref.extractall(local_path)

extract_zip_drive_to_local(drive_path, local_path_dir)

In [6]:
# Load the dataset
local_path_dataset = glob.glob(local_path_dir + '*.csv')[0]
heart_df = pd.read_csv(local_path_dataset)
heart_df

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.60,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319790,Yes,27.41,Yes,No,No,7.0,0.0,Yes,Male,60-64,Hispanic,Yes,No,Fair,6.0,Yes,No,No
319791,No,29.84,Yes,No,No,0.0,0.0,No,Male,35-39,Hispanic,No,Yes,Very good,5.0,Yes,No,No
319792,No,24.24,No,No,No,0.0,0.0,No,Female,45-49,Hispanic,No,Yes,Good,6.0,No,No,No
319793,No,32.81,No,No,No,0.0,0.0,No,Female,25-29,Hispanic,No,No,Good,12.0,No,No,No


In [7]:
# Features
quantitative_features = [
    'BMI'
]
categorical_features = [
    'Smoking',
    'AlcoholDrinking',
    'Stroke',
    'PhysicalHealth',
    'MentalHealth',
    'DiffWalking',
    'Sex',
    'AgeCategory',
    'Race',
    'Diabetic',
    'PhysicalActivity',
    'GenHealth',
    'SleepTime',
    'Asthma',
    'KidneyDisease',
    'SkinCancer'
]

In [8]:
# Apply Transformations to labels
label_transformer = lambda x: 1 if x == 'Yes' else 0
heart_df['label'] = heart_df['HeartDisease'].apply(label_transformer)

In [9]:
# Split the dataset between train and validation
y = heart_df['label'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(heart_df, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((255836, 19), (63959, 19), (255836,), (63959,))

In [10]:
X_train

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer,label
95877,No,23.33,Yes,No,No,0.0,0.0,No,Male,75-79,White,No,Yes,Very good,7.0,No,No,No,0
228939,Yes,27.46,Yes,No,Yes,30.0,0.0,No,Male,55-59,White,No,Yes,Good,6.0,No,No,No,1
260256,No,32.69,No,No,No,2.0,2.0,No,Male,50-54,Hispanic,No,No,Very good,8.0,No,No,No,0
84785,No,31.32,No,No,No,0.0,0.0,No,Female,25-29,White,No,Yes,Excellent,8.0,No,No,No,0
83845,Yes,24.63,Yes,No,No,2.0,10.0,No,Male,80 or older,White,Yes,Yes,Good,7.0,No,No,No,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119879,No,29.65,No,No,No,0.0,0.0,No,Male,60-64,White,No,No,Good,8.0,No,No,No,0
259178,No,42.60,Yes,No,No,0.0,5.0,No,Male,35-39,White,No,Yes,Good,6.0,No,No,No,0
131932,No,31.19,Yes,No,No,12.0,6.0,No,Male,65-69,White,"No, borderline diabetes",No,Very good,8.0,No,No,No,0
146867,No,22.24,No,No,No,7.0,5.0,No,Female,18-24,White,No,Yes,Excellent,8.0,No,No,No,0


In [11]:
def apply_hash(
    col_name: str,
    value: str,
    table_size: int
) -> int:
    '''
    Apply hash function to the modality of a specific feature
    Map the category into the input vector
    '''
    inpt = col_name + '_' + value
    return mmh3.hash(inpt) % table_size

Apply the hashing

In [12]:
# Hashing Trick
class Hasher:
    def __init__(self, hashing_size: int):
        self.hashing_size = hashing_size

    def hash(
        self,
        columns: List[str],
        input: List[Any]
    ) -> np.array:
        '''
        Hash a single sample from the data
        To hash the sample we need the columns
        '''
        d = len(input)
        res = torch.zeros(self.hashing_size)
        for i in range(d):
            idx = apply_hash(columns[i], str(input[i]), self.hashing_size)
            res[idx] = 1
        return res

In [13]:
# TEST cell

# Build the hasher
hasher = Hasher(hashing_size=2461)

# Get only the categorical features in the input
heart_df_cat = X_train[categorical_features]

# Get features columns (usefull for the hashing)
columns = heart_df_cat.columns.to_list()

for i in range(50):
    res = hasher.hash(columns, heart_df_cat.iloc[i].to_list())
    print(res.sum())
print()
len(columns)

tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(15.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)
tensor(16.)



16

In [14]:
# Create torch dataloader
class HealthDataset(Dataset):
    def __init__(
        self,
        df: pd.core.frame.DataFrame,
        labels: np.array,
        col_features: List[str],
        hasher: Hasher,
    ):
        self.df = df[col_features]
        self.labels = labels
        self.columns = col_features
        self.hasher = hasher

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx].to_list()
        label = self.labels[idx]
        return self.hasher.hash(self.columns, row), float(label)

In [15]:
# TEST cell
train_dataset = HealthDataset(X_train, y_train, categorical_features, hasher)
train_dataset[0]

(tensor([0., 0., 0.,  ..., 0., 0., 0.]), 0.0)

In [16]:
# Unsigmoid func
unsigmoid_fn = lambda x: torch.log(x / (1 - x))

# TEST Sanity 
unsigmoid_fn(torch.sigmoid(torch.tensor([3])))

tensor([3.0000])

In [17]:
# Create logistic regression model
class LogisticRegression(nn.Module):
    def __init__(self, input_dim: int):
        super(LogisticRegression, self).__init__()
        self.linear_layer = nn.Linear(input_dim, 1)

    def forward(self, x: np.array):
        return torch.sigmoid(self.linear_layer(x))

In [18]:
# TEST cell

input_dim = 200
batch_size = 6
rand_input = torch.randn((batch_size, input_dim))
lr_model = LogisticRegression(input_dim)

output = lr_model(rand_input)
output

tensor([[0.4645],
        [0.4472],
        [0.4818],
        [0.3360],
        [0.2389],
        [0.2205]], grad_fn=<SigmoidBackward0>)

In [79]:
def train(
    dataloader: torch.utils.data.dataloader.DataLoader,
    model: LogisticRegression,
    loss_fn: torch.nn.modules.loss.BCELoss,
    optimizer: torch.optim.Adam,
) -> float:
    model.train()

    losses = []

    for idx, batch in enumerate(tqdm.tqdm(dataloader, desc='training...', file=sys.stdout, position=0, leave=True)):
        input_samples, labels = batch
        optimizer.zero_grad()

        # Make the prediction
        preds = model(input_samples).squeeze().double()

        # Compute the loss
        loss = loss_fn(preds, labels)

        # Gradient clip
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Backprop
        loss.backward()
        optimizer.step()

        losses.append(loss)

        if idx % 501 == 0:
            print(" Training Loss: {}".format(torch.tensor(losses).mean()))

    return torch.tensor(losses).mean()

def evaluate(
    dataloader: torch.utils.data.dataloader.DataLoader,
    model: LogisticRegression,
    loss_fn: torch.nn.modules.loss.BCELoss,
) -> float:
    model.eval()

    losses = []

    with torch.no_grad():
        for idx, batch in enumerate(tqdm.tqdm(dataloader, desc='evaluation...', file=sys.stdout, position=0, leave=True)):
            input_samples, labels = batch
            preds = model(input_samples).squeeze().double()

            # Compute the loss
            loss = loss_fn(preds, labels)
            losses.append(loss)

    return torch.tensor(losses).mean()

def training_epochs(
    model: LogisticRegression,
    loss_fn: torch.nn.modules.loss.BCELoss,
    optimizer: torch.optim.Adam,
    n_epochs: int,
    train_dataloader: torch.utils.data.dataloader.DataLoader,
    test_dataloader: torch.utils.data.dataloader.DataLoader
):
    best_loss = float('inf')
    for epoch in range(n_epochs):
        print(f'Epoch: {epoch+1}')
        train_loss = train(train_dataloader, model, loss_fn, optimizer)
        test_loss = evaluate(test_dataloader, model, loss_fn)

        print()
        print("Training Loss  : {}".format(train_loss))
        print("Evaluation Loss: {}".format(test_loss))
        print()
        print('----------')


Study:

- Train a first model with all the categorical features except one (Diabetic for instance)

- Retrain the model but with all the categorical features

- Compute the loss improvement (impact of the new feature)

- Compute the loss improvement with the method described in the paper

- Comapare both loss improvement (should be same or similar)

- Do the same (all the steps) but this time with regularization

1. Train a model with categorical features except one

In [89]:
# Do not take into account age category
categorical_features_without_age = [
    'Smoking',
    'AlcoholDrinking',
    'Stroke',
    'PhysicalHealth',
    'MentalHealth',
    'DiffWalking',
    'Sex',
    # 'AgeCategory',
    'Race',
    'Diabetic',
    'PhysicalActivity',
    'GenHealth',
    'SleepTime',
    'Asthma',
    'KidneyDisease',
    'SkinCancer'
]

In [80]:
batch_size = 32
hashing_space_size = 2461

# Create the Hasher
hasher = Hasher(hashing_size=hashing_space_size)

# Create the model
lr_model = LogisticRegression(hashing_space_size)

# Create the datasets
train_dataset = HealthDataset(X_train, y_train, categorical_features_without_age, hasher)
test_dataset = HealthDataset(X_test, y_test, categorical_features_without_age, hasher)

# Create the dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size
)

# Create the loss
loss_fn = nn.BCELoss()

# Create the optimizer
lr = 0.001
optimizer = torch.optim.Adam(lr_model.parameters(), lr=lr)

In [81]:
n_epochs = 3
training_epochs(lr_model, loss_fn, optimizer, n_epochs, train_dataloader, test_dataloader)

Epoch: 1
training...:   0%|          | 0/7995 [00:00<?, ?it/s] Training Loss: 0.6833636969039508
training...:   6%|▌         | 492/7995 [00:08<01:20, 93.53it/s] Training Loss: 0.3299027479989785
training...:  13%|█▎        | 1002/7995 [00:13<01:12, 96.03it/s] Training Loss: 0.2960950030647426
training...:  19%|█▉        | 1500/7995 [00:18<01:08, 95.13it/s] Training Loss: 0.2817512017885029
training...:  25%|██▌       | 2000/7995 [00:23<01:00, 98.49it/s] Training Loss: 0.27309005875258413
training...:  31%|███       | 2495/7995 [00:28<00:56, 97.53it/s] Training Loss: 0.26448166071577583
training...:  37%|███▋      | 2996/7995 [00:34<00:51, 96.90it/s]  Training Loss: 0.2611358483060761
training...:  44%|████▍     | 3502/7995 [00:39<00:47, 94.68it/s] Training Loss: 0.2581855607327359
training...:  50%|█████     | 4001/7995 [00:44<00:39, 100.40it/s] Training Loss: 0.25555674561861536
training...:  56%|█████▋    | 4503/7995 [00:49<00:34, 99.82it/s]  Training Loss: 0.2547021473543587
trainin

2. Re-train the model with all the features

And compute the loss improvement

In [82]:
# Features
categorical_features = [
    'Smoking',
    'AlcoholDrinking',
    'Stroke',
    'PhysicalHealth',
    'MentalHealth',
    'DiffWalking',
    'Sex',
    'AgeCategory',
    'Race',
    'Diabetic',
    'PhysicalActivity',
    'GenHealth',
    'SleepTime',
    'Asthma',
    'KidneyDisease',
    'SkinCancer'
]

In [83]:
batch_size = 32
hashing_space_size = 2461

# Create the Hasher
hasher = Hasher(hashing_size=hashing_space_size)

# Create the model with all features ('af')
lr_model_af = LogisticRegression(hashing_space_size)

# Create the datasets
train_dataset = HealthDataset(X_train, y_train, categorical_features, hasher)
test_dataset = HealthDataset(X_test, y_test, categorical_features, hasher)

# Create the dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size
)

# Create the loss
loss_fn = nn.BCELoss()

# Create the optimizer
lr = 0.001
optimizer = torch.optim.Adam(lr_model_af.parameters(), lr=lr)

In [84]:
n_epochs = 3
training_epochs(lr_model_af, loss_fn, optimizer, n_epochs, train_dataloader, test_dataloader)

Epoch: 1
training...:   0%|          | 0/7995 [00:00<?, ?it/s] Training Loss: 0.683092419982565
training...:   6%|▋         | 501/7995 [00:06<01:19, 94.59it/s] Training Loss: 0.30998312434651265
training...:  12%|█▏        | 997/7995 [00:12<01:13, 95.46it/s] Training Loss: 0.28463421940062744
training...:  19%|█▊        | 1496/7995 [00:18<01:10, 92.17it/s] Training Loss: 0.27093243550760965
training...:  25%|██▍       | 1995/7995 [00:23<01:03, 94.16it/s] Training Loss: 0.26252640475099204
training...:  31%|███▏      | 2505/7995 [00:28<00:57, 95.44it/s] Training Loss: 0.2559077650023678
training...:  38%|███▊      | 3004/7995 [00:33<00:51, 95.99it/s] Training Loss: 0.2522454715398478
training...:  44%|████▍     | 3503/7995 [00:39<00:46, 96.76it/s] Training Loss: 0.24897258693549673
training...:  50%|█████     | 4000/7995 [00:44<00:41, 95.39it/s] Training Loss: 0.24720931588322817
training...:  56%|█████▋    | 4505/7995 [00:49<00:35, 97.80it/s] Training Loss: 0.24594686936706817
training

In [86]:
eval_loss_without_age = 0.24735161529341648
eval_loss_allfeatures = 0.2324779820886973

loss_improvement = 100 * (eval_loss_allfeatures - eval_loss_without_age) / eval_loss_without_age
print("Loss improvement: {}%".format(round(loss_improvement, 3)))

Loss improvement: -6.013%


Implementation of the loss improvement method described in the paper

We should find something close to -6%

IMPORTANT: In this part we must use the model: `lr_model`

In [90]:
batch_size = 32
hashing_space_size = 2461

# Create the Hasher
hasher = Hasher(hashing_size=hashing_space_size)

# Create the datasets
train_dataset = HealthDataset(X_train, y_train, categorical_features_without_age, hasher)
test_dataset = HealthDataset(X_test, y_test, categorical_features_without_age, hasher)

# Create the dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size
)

# Create the loss
loss_fn = nn.BCELoss()

In [91]:
# Make prediction with the model in which we don't have all the features
initial_train_loss = evaluate(train_dataloader, lr_model, loss_fn)
initial_test_loss = evaluate(test_dataloader, lr_model, loss_fn)

evaluation...: 100%|██████████| 7995/7995 [01:25<00:00, 93.00it/s] 
evaluation...: 100%|██████████| 1999/1999 [00:19<00:00, 100.96it/s]


In [208]:
# Make prediction with the model with all the features

# Create the datasets
train_dataset = HealthDataset(X_train, y_train, categorical_features, hasher)
test_dataset = HealthDataset(X_test, y_test, categorical_features, hasher)

# Create the dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size
)

allfeatures_train_loss = evaluate(train_dataloader, lr_model_af, loss_fn)
allfeatures_test_loss = evaluate(test_dataloader, lr_model_af, loss_fn)

evaluation...: 100%|██████████| 7995/7995 [01:17<00:00, 102.63it/s]
evaluation...: 100%|██████████| 1999/1999 [00:25<00:00, 79.30it/s]


In [144]:
def get_predictions(
    model: LogisticRegression,
    health_dataset: HealthDataset,
) -> torch.tensor:
    model.eval()

    preds = []

    with torch.no_grad():
        for idx, batch in enumerate(tqdm.tqdm(health_dataset, desc='make predictions...', file=sys.stdout, position=0, leave=True)):
            input_sample, _ = batch
            pred = model(input_sample.unsqueeze(dim=0)).squeeze().double()
            preds.append(pred.item())

    return torch.tensor(preds)

In [119]:
# TEST cell
# Sanity

l = []
for idx, elm in enumerate(test_dataset):
    if idx == 10:
        break
    l.append(int(elm[1]))


print(X_test.iloc[:10]['label'].tolist(), l)

l = []
for idx, elm in enumerate(train_dataset):
    if idx == 10:
        break
    l.append(int(elm[1]))


print(X_train.iloc[:10]['label'].tolist(), l)

[0, 0, 0, 0, 0, 1, 0, 0, 0, 0] [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0] [0, 1, 0, 0, 1, 0, 0, 0, 0, 0]


In [128]:
# Get the prediction
train_probas = get_predictions(lr_model, train_dataset)
test_probas = get_predictions(lr_model, test_dataset)

make predictions...: 100%|██████████| 255836/255836 [01:41<00:00, 2511.94it/s]
make predictions...: 100%|██████████| 63959/63959 [00:24<00:00, 2595.81it/s]


In [136]:
# Get the prediction unsigmoid
train_preds = unsigmoid_fn(train_probas)
test_preds = unsigmoid_fn(test_probas)

In [138]:
# Put the prediction in the X_train and X_test

# TRAIN
X_train['pred_proba'] = train_probas
X_train['pred_unsigmoid'] = train_preds

# TEST
X_test['pred_proba'] = test_probas
X_test['pred_unsigmoid'] = test_preds

In [142]:
# Map the labels to -1 and 1
label_mapping = lambda x: -1 if x == 0 else 1

X_train['label'] = X_train['label'].apply(label_mapping)
X_test['label'] = X_test['label'].apply(label_mapping)

In [164]:
# Compute each weight for each modality of the feature

def compute_weights(df: pd.DataFrame, feature_column: str):
    '''
    Compute the weight for all the modalities
    This implementation might not be the most efficient
    '''
    category_modalities = df[feature_column].unique()
    weights = []

    for modality in category_modalities:
        # Compute L' and L''
        filtered_df = df[df[feature_column] == modality]
        L_prime = (filtered_df['pred_proba'] - (filtered_df['label'] + 1) / 2.0).sum()
        L_prime_prime = (filtered_df['pred_proba'] * (1 - filtered_df['pred_proba'])).sum()
        w = - L_prime / L_prime_prime
        weights.append(w)

        print("Modality: {} -> weight: {}".format(modality, w))

    return torch.tensor(weights), np.array(category_modalities)

In [165]:
weights, category_modalities = compute_weights(X_train, 'AgeCategory')

Modality: 75-79 -> weight: 0.4836946052833845
Modality: 55-59 -> weight: -0.24018742041374255
Modality: 50-54 -> weight: -0.36881469618391494
Modality: 25-29 -> weight: -0.8777887738226783
Modality: 80 or older -> weight: 0.6979074249508809
Modality: 30-34 -> weight: -0.8320965726960327
Modality: 70-74 -> weight: 0.31965294429995306
Modality: 40-44 -> weight: -0.7004012757232445
Modality: 35-39 -> weight: -0.8334942087090931
Modality: 18-24 -> weight: -0.9036539242982088
Modality: 45-49 -> weight: -0.5863626574607549
Modality: 60-64 -> weight: -0.07687872416171881
Modality: 65-69 -> weight: 0.10349676540055677


In [174]:
# Add the weight column in train df and test df

def add_weight_column(modality: str, category_modalities: np.array, weights: torch.tensor):
    idx = np.argwhere(category_modalities == modality).item()
    return weights[idx].item()

X_train['weight'] = X_train['AgeCategory'].apply(add_weight_column, args=(category_modalities, weights))
X_test['weight'] = X_test['AgeCategory'].apply(add_weight_column, args=(category_modalities, weights))

In [199]:
# Re-compute the loss with the formula
def compute_loss_with_new_weights(
    df: pd.DataFrame,
) -> float:
    preds_unsigmoid = torch.tensor(df['pred_unsigmoid'].values).double()
    labels = torch.tensor(df['label'].values).double()
    weights_by_mod = torch.tensor(df['weight'].values).double()
    return torch.log(1 + torch.exp(-labels * (preds_unsigmoid + weights_by_mod))).sum() / len(df)

In [200]:
new_loss_train = compute_loss_with_new_weights(X_train)
new_loss_test = compute_loss_with_new_weights(X_test)
new_loss_train, new_loss_test

(tensor(0.2302, dtype=torch.float64), tensor(0.2375, dtype=torch.float64))

In [209]:
allfeatures_train_loss, allfeatures_test_loss

(tensor(0.2254, dtype=torch.float64), tensor(0.2325, dtype=torch.float64))

In [212]:
# SUMMARY
print("TRAIN")
print("Initial Loss:           {}".format(initial_train_loss))
print("Loss with new feature:  {}".format(allfeatures_train_loss))
print("New Loss with Chapelle: {}".format(new_loss_train))
print()
print("TEST")
print("Initial Loss:           {}".format(initial_test_loss))
print("Loss with new feature:  {}".format(allfeatures_test_loss))
print("New Loss with Chapelle: {}".format(new_loss_test))

TRAIN
Initial Loss:           0.23981577780208171
Loss with new feature:  0.22541595560126987
New Loss with Chapelle: 0.2302211276672206

TEST
Initial Loss:           0.24735161529341648
Loss with new feature:  0.2324779820886973
New Loss with Chapelle: 0.23752499006846295


In [214]:
loss_improvement_with_training = 100 * (allfeatures_test_loss - initial_test_loss) / initial_test_loss
loss_improvement_with_chapelle = 100 * (new_loss_test - initial_test_loss) / initial_test_loss

print("Loss improvement with training: {}%".format(loss_improvement_with_training))
print("Loss improvement with chapelle: {}%".format(loss_improvement_with_chapelle))

Loss improvement with training: -6.013153860780573%
Loss improvement with chapelle: -3.972735416866744%
