# Example Notebook

In this notebook we demonstrate CompFS on the Syn1 experiment from the paper. This can be used on custom data if it is written as numpy arrays.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from functools import reduce

import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# Set and print device.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


# Import required files from CompFS Repo

In [2]:
from model.metrics import accuracy, mse, gsim, tpr_fdr
from model.thresholding_functions import make_lambda_threshold, make_std_threshold, make_top_k_threshold
from model.compfs import CompFS
from model.base_model import TorchModel
from datasets.datasets import NumpyDataset

  warn(f"Failed to load image Python extension: {e}")


# Example

Here we demonstrate CompFS on Syn1. The two cells below can be edited to run your own data.

In [3]:
# These can be changed to run your own data.

X_train = np.random.normal(size=(20000, 500))
y_train = np.array([((x[0] > 0.55) or (x[1] > 0.55)) for x in X_train])
X_val = np.random.normal(size=(200, 500))
y_val = np.array([((x[0] > 0.55) or (x[1] > 0.55)) for x in X_val])

is_classification = True

ground_truth_groups = [np.array([0]), np.array([1])]

In [4]:
# This config should be changed to use your own data, and find specific
# hyperparameters for the problem.

compfs_config = {
    'model': CompFS,
    'model_config': {
        'lr': 0.003,
        'lr_decay': 0.99,
        'batchsize': 50,
        'num_epochs': 35,
        'loss_func': nn.CrossEntropyLoss(),
        'val_metric': accuracy,
        'in_dim': 500,
        'h_dim': 20,
        'out_dim': 2,
        'nlearners': 5,
        'threshold_func': make_lambda_threshold(0.7),
        'temp': 0.1,
        'beta_s': 4.5,
        'beta_s_decay': 0.99,
        'beta_d': 1.2,
        'beta_d_decay': 0.99   
    }   
}

compfs_config['device'] = device

# Train a CompFS Model and see the Groups

In [5]:
train_data = NumpyDataset(X_train, y_train, classification=is_classification)
val_data = NumpyDataset(X_val, y_val, classification=is_classification)
model = TorchModel(compfs_config)
model.train(train_data, val_data)



Training for 35 Epochs:

Epoch: 1, Average Loss: 22.872, Val Metric: 91.5, nfeatures: [11, 10, 10, 10, 14], Overlap: 3
Epoch: 2, Average Loss: 9.827, Val Metric: 92.0, nfeatures: [2, 1, 0, 1, 1], Overlap: 2
Epoch: 3, Average Loss: 5.728, Val Metric: 91.5, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 4, Average Loss: 4.283, Val Metric: 95.0, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 5, Average Loss: 3.656, Val Metric: 96.0, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 6, Average Loss: 3.306, Val Metric: 96.0, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 7, Average Loss: 3.109, Val Metric: 97.5, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 8, Average Loss: 2.976, Val Metric: 96.5, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 9, Average Loss: 2.883, Val Metric: 98.5, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 10, Average Loss: 2.828, Val Metric: 98.5, nfeatures: [1, 1, 0, 1, 1], Overlap: 2
Epoch: 11, Average Loss: 2.773, Val Metric: 98.0, nfeatures: [1, 1, 0, 1, 1], Overlap:

In [6]:
# Get group similarity and group structure.
tpr, fdr = tpr_fdr(ground_truth_groups, model.get_groups())
group_sim, ntrue, npredicted = gsim(ground_truth_groups, model.get_groups())

print('\n\nGroup Structure:')
print('Group Similarity: {:.3f}, True Positive Rate: {:.3f}%, False Discovery Rate: {:.3f}%'.format(group_sim, tpr, fdr))
print('Number of True Groups: {}, Number of Predicted Groups: {}'.format(ntrue, npredicted))

# Give selected features and save the groups.
print('\n\nSelected Features:')
learnt_groups = model.get_groups()
for i in range(len(learnt_groups)):
    print('Group: {}, Features: {}'.format(i+1, learnt_groups[i]))



Group Structure:
Group Similarity: 1.000, True Positive Rate: 100.000%, False Discovery Rate: 0.000%
Number of True Groups: 2, Number of Predicted Groups: 2


Selected Features:
Group: 1, Features: [1]
Group: 2, Features: [0]


We see that the model finds the features, usually separating features 0 and 1, occasionally grouping them together.