In [1]:
from protosc import filter_model
from protosc.simulation import create_correlated_data
from protosc.benchmark import execute
from protosc.wrapper import Wrapper
import numpy as np
import pandas as pd

In [2]:
np.random.seed(1928374)
X, y, truth = create_correlated_data()

In [3]:
output = execute(X, y, fold_seed = 213874)

100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [01:00<00:00,  7.58s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:32<00:00,  4.05s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:37<00:00,  4.73s/it]


In [4]:
output

{'Accuracy': [0.7065283479292099, 0.8330663884327678, 0.8275379720853859],
 'Recurring features': [[769,
   788,
   682,
   175,
   176,
   693,
   451,
   591,
   81,
   468,
   344,
   473,
   862,
   863,
   485,
   486,
   635,
   497,
   251,
   766],
  [],
  [591, 81, 175, 788, 344, 862, 473, 485, 486, 497]],
 'Feature frequencies': [{769: 8,
   788: 8,
   682: 8,
   175: 8,
   176: 8,
   693: 8,
   451: 8,
   591: 8,
   81: 8,
   468: 8,
   344: 8,
   473: 8,
   862: 8,
   863: 8,
   485: 8,
   486: 8,
   635: 8,
   497: 8,
   251: 8,
   766: 8},
  {0: 1,
   521: 1,
   12: 4,
   13: 1,
   528: 1,
   18: 1,
   27: 1,
   38: 1,
   563: 2,
   567: 1,
   572: 1,
   70: 1,
   587: 1,
   591: 6,
   80: 1,
   81: 6,
   79: 1,
   597: 4,
   607: 1,
   106: 1,
   107: 1,
   116: 1,
   629: 1,
   631: 1,
   119: 1,
   635: 3,
   124: 1,
   151: 1,
   663: 1,
   682: 3,
   684: 4,
   175: 6,
   693: 3,
   187: 1,
   702: 1,
   193: 2,
   203: 2,
   215: 1,
   216: 1,
   732: 1,
   224: 1,


In [14]:
av_accuracy = np.mean([i[1] for i in output_filter])
av_accuracy

0.7065283479292099

In [31]:
def __examine_filter(output_filter, n_fold):
    # Calculate average accuracy score
    accuracies = [a[1] for a in output_filter]
    av_accuracy = np.mean(accuracies)

    # Determine feature frequencies
    all_features = [f for feat in output_filter for f in feat[0]]
    fq_features = {}
    rec_features = []
    for x in set(all_features):
        fq_features[x] = all_features.count(x)
        if all_features.count(x) == n_fold:
            rec_features.append(x)

    # Add all findings to one dicitonary
    results = {'Accuracy': av_accuracy, 'Recurring features': rec_features, 'Feature frequencies': fq_features}
    return results

In [41]:
def __examine_wrapper(output_wrapper, n_fold):
    # Calculate average accuracy score
    av_accuracy = np.mean(output_wrapper['accuracy'])

    # Determine feature frequencies
    all_features = [f for feat in output_wrapper['features'] for f in feat]
    fq_features = {}
    for x in set(all_features):
        fq_features[x] = all_features.count(x)

    # Find recurring features
    rec_features = output_wrapper['recurring']

    # Add all findings to one dicitonary
    results = {'Accuracy': av_accuracy, 'Recurring features': rec_features,
               'Feature frequencies': fq_features}
    return results

In [47]:
filter_model = __examine_filter(output_filter, n_fold=8)
wrapper_fast = __examine_wrapper(output_wrapper, n_fold=8)
dicts = [filter_model, wrapper_fast]
overview = {k: [d[k] for d in dicts] for k in dicts[0]}
overview

{'Accuracy': [0.7065283479292099, 0.8330663884327678],
 'Recurring features': [[769,
   788,
   682,
   175,
   176,
   693,
   451,
   591,
   81,
   468,
   344,
   473,
   862,
   863,
   485,
   486,
   635,
   497,
   251,
   766],
  []],
 'Feature frequencies': [{769: 8,
   788: 8,
   682: 8,
   175: 8,
   176: 8,
   693: 8,
   451: 8,
   591: 8,
   81: 8,
   468: 8,
   344: 8,
   473: 8,
   862: 8,
   863: 8,
   485: 8,
   486: 8,
   635: 8,
   497: 8,
   251: 8,
   766: 8},
  {0: 1,
   521: 1,
   12: 4,
   13: 1,
   528: 1,
   18: 1,
   27: 1,
   38: 1,
   563: 2,
   567: 1,
   572: 1,
   70: 1,
   587: 1,
   591: 6,
   80: 1,
   81: 6,
   79: 1,
   597: 4,
   607: 1,
   106: 1,
   107: 1,
   116: 1,
   629: 1,
   631: 1,
   119: 1,
   635: 3,
   124: 1,
   151: 1,
   663: 1,
   682: 3,
   684: 4,
   175: 6,
   693: 3,
   187: 1,
   702: 1,
   193: 2,
   203: 2,
   215: 1,
   216: 1,
   732: 1,
   224: 1,
   225: 1,
   249: 1,
   253: 1,
   767: 1,
   769: 3,
   781: 1,
   270:

## Filter model

In [5]:
%%time
np.random.seed(9835)
feature_accuracy = filter_model(X, y, fold_seed=213874)

Wall time: 36.2 s


In [8]:
# Print outcome in dataframe
df_filter = pd.DataFrame(feature_accuracy)
df_filter = df_filter.rename(columns={0: 'features', 1:'accuracy'})
df_filter

Unnamed: 0,features,accuracy
0,"[175, 486, 81, 344, 591, 635, 682, 769, 693, 8...",0.551724
1,"[485, 862, 788, 473, 497, 175, 81, 486, 591, 3...",0.732143
2,"[485, 862, 497, 473, 788, 635, 682, 769, 693, ...",0.724138
3,"[862, 485, 497, 788, 473, 486, 175, 81, 344, 5...",0.714286
4,"[485, 862, 497, 788, 473, 486, 81, 175, 344, 5...",0.740741
5,"[485, 862, 788, 497, 473, 81, 486, 344, 175, 5...",0.633333
6,"[485, 862, 788, 473, 497, 175, 81, 344, 486, 5...",0.775862
7,"[485, 788, 862, 497, 473, 175, 81, 486, 591, 3...",0.78


## Wrapper
### Slow method

In [12]:
# Slow method (add immediately = False)
slow = Wrapper(X, y, fold_seed=213874)
out_slow = slow.wrapper(n_jobs=-1)

100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:28<00:00,  3.51s/it]


In [13]:
# Print outcome in dataframe
df_wr_slow = pd.DataFrame([value for key, value in out_slow.items() if key == 'features' or key == 'accuracy']).T
df_wr_slow = df_wr_slow.rename(columns={0: 'Features', 1: 'Accuracy'})
df_wr_slow

Unnamed: 0,Features,Accuracy
0,"[862, 485, 497, 788, 473, 597, 684, 12, 984, 9...",0.793103
1,"[485, 862, 788, 473, 497, 175, 81, 486, 591, 3...",0.910714
2,"[175, 486, 81, 344, 591, 597, 684, 12, 984, 95...",0.896552
3,"[862, 485, 497, 788, 473, 486, 175, 81, 344, 5...",0.821429
4,"[635, 769, 682, 693, 863, 302, 124, 732, 899, ...",0.796296
5,"[485, 862, 788, 497, 473, 81, 486, 344, 175, 5...",0.833333
6,"[424, 381, 563, 465, 203, 324, 38, 587, 521, 4...",0.793103
7,"[635, 769, 682, 693, 863, 175, 81, 486, 591, 3...",0.82


### Fast method

In [None]:
# Fast method (add immediately = True)
fast = Wrapper(X, y, add_im=True, fold_seed=213874)
out_fast = fast.wrapper(n_jobs=-1)

In [None]:
# Print outcome in dataframe
df_wr_fast = pd.DataFrame([value for key, value in out_fast.items() if key == 'features' or key == 'accuracy']).T
df_wr_fast = df_wr_fast.rename(columns={0: 'Features', 1: 'Accuracy'})
df_wr_fast

In [23]:
# outcome
slow.__matching(out_slow)

AttributeError: 'Wrapper' object has no attribute '__matching'

In [35]:
slow.__matching()

AttributeError: 'Wrapper' object has no attribute '__matching'