In [None]:
# Step 1: load "<code>data_pool_LELS_no_bad_tiffs.tsv</code>"

In [1]:
import pandas as pd

In [2]:
col_header_names = [ 'patient_id', 'dummy_id', 't', 'eye', 'side', 'path', \
                    'baseline_scale', 'casecontrol', 'severity', 'simple']

In [3]:
data_pool = pd.read_csv( 'data_pool_LELS_no_bad_tiffs.tsv', sep='\t', header=None, names=col_header_names)

In [4]:
data_pool

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
0,1890,51003,4,LE,LS,tiff/AMD_Category_2_Other/51003-04-F2-LE-LS.tiff,2,Other,2,1
1,1190,51023,4,LE,LS,tiff/AMD_Category_1_Other/51023-04-F2-LE-LS.tiff,1,Other,2,1
2,2147,51026,22,LE,LS,tiff/AMD_Category_1_Control/51026-22-F2-LE-LS....,1,Control,1,0
3,2963,51030,12,LE,LS,tiff/AMD_Category_2_Other/51030-12-F2-LE-LS.tiff,2,Other,1,1
4,5612,51034,12,LE,LS,tiff/AMD_Category_1_Other/51034-12-F2-LE-LS.tiff,1,Other,1,0
5,2352,51036,20,LE,LS,tiff/AMD_Category_2_Other/51036-20-F2-LE-LS.tiff,2,Other,1,0
6,4823,51045,12,LE,LS,tiff/AMD_Category_2_Other/51045-12-F2-LE-LS.tiff,2,Other,4,1
7,1352,51046,24,LE,LS,tiff/AMD_Category_2_Other/51046-24-F2-LE-LS.tiff,2,Other,1,0
8,4086,51056,18,LE,LS,tiff/AMD_Category_1_Other/51056-18-F2-LE-LS.tiff,1,Other,1,0
9,1070,51060,14,LE,LS,tiff/AMD_Category_1_Control/51060-14-F2-LE-LS....,1,Control,1,0


In [5]:
len(data_pool)

33529

# Step 2: Filter out images with advanced AMD (sev score 10, 11, 12)

In [6]:
data_pool = data_pool[ data_pool.severity <= 9 ]

In [7]:
len(data_pool)

28994

## Known problem with dummy_id 52831

In [8]:
data_pool = data_pool[ data_pool.dummy_id != 52831 ]

In [9]:
len(data_pool)

28990

# Step 3: Only use rows for which sigs have been calculated

In [10]:
sigs_list = pd.read_csv( 'completed-sigs-2015-07-22.txt', header=None, names=['path'])

In [11]:
sigs_list.head()

Unnamed: 0,path
0,tiff/AMD_Category_4b_Control/55698-04-F2-LE-LS...
1,tiff/AMD_Category_4b_Control/55698-04-F2-LE-RS...
2,tiff/AMD_Category_4b_Control/55698-04-F2-RE-LS...
3,tiff/AMD_Category_4b_Control/55698-04-F2-RE-RS...
4,tiff/AMD_Category_4b_Control/55698-QUA-F2-LE-L...


In [12]:
len( sigs_list)

27412

In [13]:
sigs_list_set = set(sigs_list['path'])

In [14]:
len(sigs_list_set)

27412

In [15]:
data_pool_path_set = set( data_pool['path'])

In [16]:
len(data_pool_path_set)

28990

In [17]:
intersection = sigs_list_set & data_pool_path_set

In [18]:
len(intersection)

5434

In [19]:
data_pool_w_sigs = pd.merge( data_pool, sigs_list, on='path', how='inner' )

In [20]:
len(data_pool_w_sigs)

5434

In [21]:
data_pool_w_sigs

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
0,1890,51003,4,LE,LS,tiff/AMD_Category_2_Other/51003-04-F2-LE-LS.tiff,2,Other,2,1
1,1190,51023,4,LE,LS,tiff/AMD_Category_1_Other/51023-04-F2-LE-LS.tiff,1,Other,2,1
2,2147,51026,22,LE,LS,tiff/AMD_Category_1_Control/51026-22-F2-LE-LS....,1,Control,1,0
3,2963,51030,12,LE,LS,tiff/AMD_Category_2_Other/51030-12-F2-LE-LS.tiff,2,Other,1,1
4,5612,51034,12,LE,LS,tiff/AMD_Category_1_Other/51034-12-F2-LE-LS.tiff,1,Other,1,0
5,2352,51036,20,LE,LS,tiff/AMD_Category_2_Other/51036-20-F2-LE-LS.tiff,2,Other,1,0
6,4823,51045,12,LE,LS,tiff/AMD_Category_2_Other/51045-12-F2-LE-LS.tiff,2,Other,4,1
7,1352,51046,24,LE,LS,tiff/AMD_Category_2_Other/51046-24-F2-LE-LS.tiff,2,Other,1,0
8,4086,51056,18,LE,LS,tiff/AMD_Category_1_Other/51056-18-F2-LE-LS.tiff,1,Other,1,0
9,1070,51060,14,LE,LS,tiff/AMD_Category_1_Control/51060-14-F2-LE-LS....,1,Control,1,0


## How many unique patients here?

In [22]:
len( data_pool_w_sigs['patient_id'].unique() )

931

# Step 4: Group by patient_id; Take img for which sev is highest

In [23]:
grouped = data_pool_w_sigs.groupby('patient_id')

In [24]:
group_iter = grouped.__iter__()

In [25]:
retval = group_iter.next()

In [26]:
len(retval)

2

In [27]:
retval[0]

'1001'

In [28]:
retval[1]

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
87,1001,51685,4,LE,LS,tiff/AMD_Category_3a_Other/51685-04-F2-LE-LS.tiff,3a,Other,3,1
434,1001,51685,6,LE,LS,tiff/AMD_Category_3a_Other/51685-06-F2-LE-LS.tiff,3a,Other,3,1
781,1001,51685,8,LE,LS,tiff/AMD_Category_3a_Other/51685-08-F2-LE-LS.tiff,3a,Other,2,1
1114,1001,51685,10,LE,LS,tiff/AMD_Category_3a_Other/51685-10-F2-LE-LS.tiff,3a,Other,4,1
1453,1001,51685,14,LE,LS,tiff/AMD_Category_3a_Other/51685-14-F2-LE-LS.tiff,3a,Other,6,2
1798,1001,51685,16,LE,LS,tiff/AMD_Category_3a_Other/51685-16-F2-LE-LS.tiff,3a,Other,8,4
2136,1001,51685,20,LE,LS,tiff/AMD_Category_3a_Other/51685-20-F2-LE-LS.tiff,3a,Other,6,3
2468,1001,51685,0,LE,LS,tiff/AMD_Category_3a_Other/51685-QUA-F2-LE-LS....,3a,Other,1,1


In [29]:
max_severity = retval[1]['severity'].max()

In [30]:
max_severity

8.0

In [31]:
retval[1].loc[ retval[1]['severity'] == max_severity ]

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
1798,1001,51685,16,LE,LS,tiff/AMD_Category_3a_Other/51685-16-F2-LE-LS.tiff,3a,Other,8,4


In [32]:
groups = grouped.__iter__()

In [33]:
master_LELS = pd.concat( [ g.loc[ g['severity'] == g['severity'].max() ] for l, g in groups ] )

In [34]:
len(master_LELS)

2265

In [35]:
master_LELS.drop_duplicates( 'patient_id', inplace=True)

In [36]:
len(master_LELS)

931

In [37]:
master_LELS

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
1798,1001,51685,16,LE,LS,tiff/AMD_Category_3a_Other/51685-16-F2-LE-LS.tiff,3a,Other,8,4
2948,1008,57209,7,LE,LS,tiff/AMD_Category_4b_Other/57209-07-F2-LE-LS.tiff,4b,Other,8,2
4532,1011,52594,8,LE,LS,tiff/AMD_Category_4a_Case/52594-08-F2-LE-LS.tiff,4a,Case,9,4
2880,1016,53521,6,LE,LS,tiff/AMD_Category_4a_Case/53521-06-F2-LE-LS.tiff,4a,Case,8,4
3206,1017,52761,0,LE,LS,tiff/AMD_Category_3b_Case/52761-QUA-F2-LE-LS.tiff,3b,Case,9,4
1498,1031,52387,10,LE,LS,tiff/AMD_Category_1_Other/52387-10-F2-LE-LS.tiff,1,Other,2,1
4243,1040,53738,0,LE,LS,tiff/AMD_Category_3a_Case/53738-QUA-F2-LE-LS.tiff,3a,Case,8,2
3604,1045,55487,6,LE,LS,tiff/AMD_Category_4b_Case/55487-06-F2-LE-LS.tiff,4b,Case,8,4
2457,1046,51624,8,LE,LS,tiff/AMD_Category_2_Other/51624-08-F2-LE-LS.tiff,2,Other,2,1
2217,1049,53483,14,LE,LS,tiff/AMD_Category_3a_Case/53483-14-F2-LE-LS.tiff,3a,Case,9,4


# What do our classes look like?

In [38]:
vc = master_LELS['severity'].value_counts( sort=False )

In [39]:
vc

1    118
2    105
3     53
4     84
5     57
6     78
7    116
8    199
9    121
dtype: int64

# We have abundance of severity 8; try to steer some of those patients into classes 3 and 5 if they have prior images with those scores

In [61]:
groups = grouped.__iter__()

In [62]:
label, group = group_iter.next()

In [63]:
group

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
2948,1008,57209,7,LE,LS,tiff/AMD_Category_4b_Other/57209-07-F2-LE-LS.tiff,4b,Other,8,2
3288,1008,57209,0,LE,LS,tiff/AMD_Category_4b_Other/57209-QUA-F2-LE-LS....,4b,Other,4,1


In [71]:
group['severity']

2948    8
3288    4
Name: severity, dtype: float64

In [65]:
import numpy as np

In [66]:
np.any( group['severity'] == 8 )

True

In [67]:
np.any( group['severity'] == 5 )

False

In [72]:
label, group = group_iter.next()
picked = []
for label, Group in groups:
    my_max = Group['severity'].max()
    if my_max == 8.0:
        if np.any( Group[ 'severity'] == 3.0 ):
            picked.append( Group.loc[ Group['severity'] == 3.0 ] )
        elif np.any( Group[ 'severity'] == 5.0 ):
            picked.append( Group.loc[ Group['severity'] == 5.0 ] )
        else:
            picked.append( Group.loc[ Group['severity'] == my_max] )
    else:
        picked.append( Group.loc[ Group['severity'] == my_max] )

In [73]:
len(picked)

931

In [74]:
master_LELS = pd.concat( picked )

In [76]:
master_LELS.drop_duplicates( 'patient_id', inplace=True)

In [77]:
master_LELS['severity'].value_counts( sort=False )

1    118
2    105
3     75
4     84
5    121
6     78
7    116
8    113
9    121
dtype: int64

# Create file of files

In [78]:
master_LELS.sort( ['severity', 'patient_id'], inplace=True )

In [79]:
master_LELS

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
77,1060,51626,10,LE,LS,tiff/AMD_Category_1_Control/51626-10-F2-LE-LS....,1,Control,1,0
9,1070,51060,14,LE,LS,tiff/AMD_Category_1_Control/51060-14-F2-LE-LS....,1,Control,1,0
71,1093,51593,8,LE,LS,tiff/AMD_Category_1_Control/51593-08-F2-LE-LS....,1,Control,1,0
45,1188,51362,24,LE,LS,tiff/AMD_Category_1_Other/51362-24-F2-LE-LS.tiff,1,Other,1,0
4794,1243,51454,4,LE,LS,tiff/AMD_Category_2_Control/51454-04-F2-LE-LS....,2,Control,1,0
4437,1303,51379,4,LE,LS,tiff/AMD_Category_1_Control/51379-04-F2-LE-LS....,1,Control,1,0
14,1392,51096,10,LE,LS,tiff/AMD_Category_1_Control/51096-10-F2-LE-LS....,1,Control,1,0
4101,1444,51377,0,LE,LS,tiff/AMD_Category_1_Control/51377-QUA-F2-LE-LS...,1,Control,1,0
5172,1448,51713,4,LE,LS,tiff/AMD_Category_2_Other/51713-04-F2-LE-LS.tiff,2,Other,1,1
2806,1482,51686,0,LE,LS,tiff/AMD_Category_1_Control/51686-QUA-F2-LE-LS...,1,Control,1,0


## reindex:

In [80]:
master_LELS.index = range(len(master_LELS))

In [81]:
master_LELS

Unnamed: 0,patient_id,dummy_id,t,eye,side,path,baseline_scale,casecontrol,severity,simple
0,1060,51626,10,LE,LS,tiff/AMD_Category_1_Control/51626-10-F2-LE-LS....,1,Control,1,0
1,1070,51060,14,LE,LS,tiff/AMD_Category_1_Control/51060-14-F2-LE-LS....,1,Control,1,0
2,1093,51593,8,LE,LS,tiff/AMD_Category_1_Control/51593-08-F2-LE-LS....,1,Control,1,0
3,1188,51362,24,LE,LS,tiff/AMD_Category_1_Other/51362-24-F2-LE-LS.tiff,1,Other,1,0
4,1243,51454,4,LE,LS,tiff/AMD_Category_2_Control/51454-04-F2-LE-LS....,2,Control,1,0
5,1303,51379,4,LE,LS,tiff/AMD_Category_1_Control/51379-04-F2-LE-LS....,1,Control,1,0
6,1392,51096,10,LE,LS,tiff/AMD_Category_1_Control/51096-10-F2-LE-LS....,1,Control,1,0
7,1444,51377,0,LE,LS,tiff/AMD_Category_1_Control/51377-QUA-F2-LE-LS...,1,Control,1,0
8,1448,51713,4,LE,LS,tiff/AMD_Category_2_Other/51713-04-F2-LE-LS.tiff,2,Other,1,1
9,1482,51686,0,LE,LS,tiff/AMD_Category_1_Control/51686-QUA-F2-LE-LS...,1,Control,1,0


In [82]:
pre = '/Volumes/LGDATA/areds/44770/AREDS/AREDS_2014/'

In [83]:
master_iter = master_LELS.iterrows()

In [84]:
master_iter.next()

(0, patient_id                                                     1060
 dummy_id                                                      51626
 t                                                                10
 eye                                                              LE
 side                                                             LS
 path              tiff/AMD_Category_1_Control/51626-10-F2-LE-LS....
 baseline_scale                                                    1
 casecontrol                                                 Control
 severity                                                          1
 simple                                                            0
 Name: 0, dtype: object)

In [85]:
master_iter = master_LELS.iterrows()

In [86]:
untiled_fof_list = [ (pre + row['path'], row['severity'] ) for i, row in master_iter ]

# Add tile info

Want middle 4 tiles, i.e., indices, 14, 15, 20, 21

In [88]:
info5D = '{name}\t{gt}\t{path}\t{{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index={col};tile_row_index={row};sample_sequence_id={ssid}}}'

In [89]:
info5D.format( name="foo", gt=42, path='/var/oosh', col=78, row=56, ssid=999 )

'foo\t42\t/var/oosh\t{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index=78;tile_row_index=56;sample_sequence_id=999}'

In [90]:
from os.path import basename

In [91]:
fof_list = []
for path, gt in untiled_fof_list:
    tiff_name = basename(path)
    name, _ = tiff_name.split('-',1)
    ssid = 0
    for col in [2,3]:
        for row in [2,3]:
            fof_list.append( info5D.format( name=name, gt=gt, path=path, col=col, row=row, ssid=ssid ))
            ssid += 1

In [92]:
for _ in fof_list[:5]:
    print _

51626	1.0	/Volumes/LGDATA/areds/44770/AREDS/AREDS_2014/tiff/AMD_Category_1_Control/51626-10-F2-LE-LS.tiff	{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index=2;tile_row_index=2;sample_sequence_id=0}
51626	1.0	/Volumes/LGDATA/areds/44770/AREDS/AREDS_2014/tiff/AMD_Category_1_Control/51626-10-F2-LE-LS.tiff	{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index=2;tile_row_index=3;sample_sequence_id=1}
51626	1.0	/Volumes/LGDATA/areds/44770/AREDS/AREDS_2014/tiff/AMD_Category_1_Control/51626-10-F2-LE-LS.tiff	{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index=3;tile_row_index=2;sample_sequence_id=2}
51626	1.0	/Volumes/LGDATA/areds/44770/AREDS/AREDS_2014/tiff/AMD_Category_1_Control/51626-10-F2-LE-LS.tiff	{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index=3;tile_row_index=3;sample_sequence_id=3}
51060	1.0	/Volumes/LGDATA/areds/44770/AREDS/AREDS_2014/tiff/AMD_Category_1_Control/51060-14-F2-LE-LS.tiff	{long=True;tile_num_cols=6;tile_num_rows=6;tile_col_index=2;tile_row_index

In [93]:
with open( 'master_LELS_inner4_sev1-9_balanced-ish.fof.tsv', 'w') as fof:
    for line in fof_list:
        fof.write( line + '\n' )

# Baseline WND-CHARM 4-tile 9-class Severity score WITHOUT LDA

In [1]:
import wndcharm

In [2]:
print wndcharm.__version__

0.9.2


Using code base where I squashed some bugs, haven't merged it to master yet.

In [3]:
from wndcharm.FeatureSpace import FeatureSpace

In [None]:
fs = FeatureSpace.NewFromFileOfFiles( 'master_LELS_inner4_sev1-9_balanced-ish.fof.tsv', num_samples_per_group=4, tile_num_rows=None, tile_num_cols=None)

In [4]:
fs = FeatureSpace.NewFromFitFile( 'master_LELS_inner4_sev1-9_balanced-ish.fit', tile_num_rows=2, tile_num_cols=2)

LOADED FEATURE SPACE FROM WND-CHARM .fit FILE master_LELS_inner4_sev1-9_balanced-ish.fit: <FeatureSpace "master_LELS_inner4_sev1-9_balanced-ish.fit" n_features=2919 n_total_samples=3724 n_samples_per_group=4 n_classes=9 samples_per_class=("1.0": 472, "2.0": 420, "3.0": 300, "4.0": 336, "5.0": 484, "6.0": 312, "7.0": 464, "8.0": 452, "9.0": 484)>


In [5]:
fs.num_samples_per_group

4

In [6]:
from wndcharm.FeatureSpacePredictionExperiment import FeatureSpaceClassificationExperiment

In [7]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 40
kwargs['test_size'] = 3
kwargs['n_iter'] = 150
kwargs['lda'] = False # True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [8]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.185185185185
1	0.0740740740741
2	0.0740740740741
3	0.148148148148
4	0.259259259259
5	0.185185185185
6	0.111111111111
7	0.185185185185
8	0.148148148148
9	0.185185185185
10	0.148148148148
11	0.185185185185
12	0.185185185185
13	0.185185185185
14	0.185185185185
15	0.259259259259
16	0.148148148148
17	0.111111111111
18	0.296296296296
19	0.222222222222
20	0.0740740740741
21	0.111111111111
22	0.222222222222
23	0.0740740740741
24	0.0740740740741
25	0.111111111111
26	0.185185185185
27	0.0740740740741
28	0.111111111111
29	0.111111111111
30	0.037037037037
31	0.111111111111
32	0.148148148148
33	0.111111111111
34	0.222222222222
35	0.185185185185
36	0.148148148148
37	0.296296296296
38	0.0740740740741
39	0.222222222222
40	0.148148148148
41	0.259259259259
42	0.333333333333
43	0.222222222222
44	0.259259259259
45	0.148148148148
46	0.148148148148
47	0.185185185185
48	0.259259259259
49	0.037037037037
50	0.259259259259
51	0.222222222222
52	0.148148148148
53	0.259259259259
54	0.2222

In [9]:
exp.Print()

FeatureSpaceClassificationExperiment "master_LELS_inner4_sev1-9_balanced-ish.fit (sorted)" (150 iterations)
694/4050 correct = 17.14 +/- 1.16% w/ 95% conf. (normal approx. interval)
Standard Error: 2.1594
Pearson Corellation Coefficient (r): 0.5770
Coefficient of Determination (r^2): 0.3329
Spearman Coefficient: 0.5711


Confusion Matrix:
   	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	|	total	acc.
   	---	---	---	---	---	---	---	---	---	|	-----	----
1.0	109	142	56	58	36	20	7	14	8	|	450	24.22%
2.0	130	93	57	62	35	21	16	28	8	|	450	20.67%
3.0	100	108	50	72	36	29	20	15	20	|	450	11.11%
4.0	92	112	67	38	55	22	18	17	29	|	450	8.44%
5.0	60	91	54	56	32	28	34	49	46	|	450	7.11%
6.0	56	65	39	34	48	50	43	70	45	|	450	11.11%
7.0	33	51	25	34	42	33	68	110	54	|	450	15.11%
8.0	28	42	35	24	48	34	61	99	79	|	450	22.00%
9.0	20	20	17	12	24	19	63	120	155	|	450	34.44%


Similarity Matrix:
   	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0
   	---	---	---	---	---	---	---	---	---
1.0	1.00	1.03	0.87	0.88	0.73	0.59	0.50	0.45	0.36	
2.

In [10]:
exp.PerSampleStatistics( output_filepath='master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_WITHsplit.results.tsv' )
exp.PerSampleStatistics( print_indiv=False, output_filepath='master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_.results.tsv' )

Saving output of function "PerSampleStatistics()" to file "master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_WITHsplit.results.tsv", mode "w"
Saving output of function "PerSampleStatistics()" to file "master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_.results.tsv", mode "w"


<FeatureSpaceClassificationExperiment #0 "master_LELS_inner4_sev1-9_balanced-ish.fit (sorted)" n_splits=150 n_calls=4050 n_corr=694 acc=17.14% std_err=2.16%>

# 9-class Severity score WITH LDA

In [1]:
from wndcharm.FeatureSpace import FeatureSpace

Note reloading of feature space anew each time to be safe, just in case it was modified by last run

In [11]:
fs = FeatureSpace.NewFromFitFile( 'master_LELS_inner4_sev1-9_balanced-ish.fit', tile_num_rows=2, tile_num_cols=2)

LOADED FEATURE SPACE FROM WND-CHARM .fit FILE master_LELS_inner4_sev1-9_balanced-ish.fit: <FeatureSpace "master_LELS_inner4_sev1-9_balanced-ish.fit" n_features=2919 n_total_samples=3724 n_samples_per_group=4 n_classes=9 samples_per_class=("1.0": 472, "2.0": 420, "3.0": 300, "4.0": 336, "5.0": 484, "6.0": 312, "7.0": 464, "8.0": 452, "9.0": 484)>


In [3]:
from wndcharm.FeatureSpacePredictionExperiment import FeatureSpaceClassificationExperiment

In [12]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 40
kwargs['test_size'] = 3
kwargs['n_iter'] = 150
kwargs['lda'] = True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [13]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.185185185185
1	0.259259259259
2	0.185185185185
3	0.0740740740741
4	0.185185185185
5	0.0740740740741
6	0.222222222222
7	0.259259259259
8	0.148148148148
9	0.185185185185
10	0.222222222222
11	0.259259259259
12	0.222222222222
13	0.185185185185
14	0.148148148148
15	0.37037037037
16	0.185185185185
17	0.185185185185
18	0.222222222222
19	0.222222222222
20	0.185185185185
21	0.259259259259
22	0.148148148148
23	0.111111111111
24	0.222222222222
25	0.148148148148
26	0.259259259259
27	0.37037037037
28	0.259259259259
29	0.148148148148
30	0.259259259259
31	0.111111111111
32	0.148148148148
33	0.333333333333
34	0.222222222222
35	0.222222222222
36	0.185185185185
37	0.296296296296
38	0.259259259259
39	0.296296296296
40	0.185185185185
41	0.296296296296
42	0.37037037037
43	0.333333333333
44	0.148148148148
45	0.037037037037
46	0.222222222222
47	0.259259259259
48	0.148148148148
49	0.333333333333
50	0.222222222222
51	0.185185185185
52	0.185185185185
53	0.148148148148
54	0.185185185185



In [14]:
exp.Print()

Displaying feature weight statistics for all 8 features
FeatureSpaceClassificationExperiment "master_LELS_inner4_sev1-9_balanced-ish.fit (sorted)" (150 iterations)
814/4050 correct = 20.10 +/- 1.23% w/ 95% conf. (normal approx. interval)
Standard Error: 2.0577
Pearson Corellation Coefficient (r): 0.6087
Coefficient of Determination (r^2): 0.3705
Spearman Coefficient: 0.6088


Confusion Matrix:
   	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	|	total	acc.
   	---	---	---	---	---	---	---	---	---	|	-----	----
1.0	136	94	69	73	30	18	16	10	4	|	450	30.22%
2.0	96	101	61	73	41	26	16	21	15	|	450	22.44%
3.0	84	67	74	66	53	40	26	22	18	|	450	16.44%
4.0	63	75	70	74	73	39	20	24	12	|	450	16.44%
5.0	47	70	40	58	57	53	33	52	40	|	450	12.67%
6.0	26	45	37	34	41	70	90	58	49	|	450	15.56%
7.0	18	31	38	40	43	65	75	79	61	|	450	16.67%
8.0	15	23	32	38	46	55	63	97	81	|	450	21.56%
9.0	14	14	23	19	36	50	57	107	130	|	450	28.89%


Similarity Matrix:
   	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0
   	---	---	---	---	---	---	---	---	-

In [17]:
exp.PerSampleStatistics( output_filepath='master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_LDA_WITHsplit.results.tsv' )
exp.PerSampleStatistics( print_indiv=False, output_filepath='master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_LDA.results.tsv' )

Saving output of function "PerSampleStatistics()" to file "master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_LDA_WITHsplit.results.tsv", mode "w"
Saving output of function "PerSampleStatistics()" to file "master_LELS_inner4_sev1-9_balanced-ish_i40_j3_n150_LDA.results.tsv", mode "w"


<FeatureSpaceClassificationExperiment #1 "master_LELS_inner4_sev1-9_balanced-ish.fit (sorted)" n_splits=150 n_calls=4050 n_corr=814 acc=20.10% std_err=2.06%>

# 12-class Severity score cross-validation WITHOUT LDA

In [18]:
fs = FeatureSpace.NewFromFitFile( 'master_LELS_inner4tiles.fit', tile_num_rows=2, tile_num_cols=2)

LOADED FEATURE SPACE FROM WND-CHARM .fit FILE master_LELS_inner4tiles.fit: <FeatureSpace "master_LELS_inner4tiles.fit" n_features=2919 n_total_samples=4108 n_samples_per_group=4 n_classes=12 samples_per_class=("1.0": 468, "2.0": 416, "3.0": 208, "4.0": 304, "5.0": 204, "6.0": 216, "7.0": 296, "8.0": 560, "9.0": 220, "10.0": 264, "11.0": 780, "12.0": 172)>


In [19]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 40
kwargs['test_size'] = 3
kwargs['n_iter'] = 150
kwargs['lda'] = False # True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [20]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.111111111111
1	0.222222222222
2	0.166666666667
3	0.0555555555556
4	0.138888888889
5	0.222222222222
6	0.138888888889
7	0.0555555555556
8	0.222222222222
9	0.138888888889
10	0.166666666667
11	0.111111111111
12	0.111111111111
13	0.166666666667
14	0.166666666667
15	0.166666666667
16	0.111111111111
17	0.166666666667
18	0.111111111111
19	0.138888888889
20	0.138888888889
21	0.0833333333333
22	0.0833333333333
23	0.166666666667
24	0.194444444444
25	0.277777777778
26	0.166666666667
27	0.194444444444
28	0.138888888889
29	0.111111111111
30	0.194444444444
31	0.0833333333333
32	0.194444444444
33	0.305555555556
34	0.166666666667
35	0.0833333333333
36	0.138888888889
37	0.194444444444
38	0.222222222222
39	0.25
40	0.138888888889
41	0.0277777777778
42	0.222222222222
43	0.166666666667
44	0.166666666667
45	0.277777777778
46	0.138888888889
47	0.138888888889
48	0.0833333333333
49	0.0833333333333
50	0.138888888889
51	0.0833333333333
52	0.166666666667
53	0.25
54	0.0555555555556
55	0.05

In [21]:
exp.Print()

FeatureSpaceClassificationExperiment "master_LELS_inner4tiles.fit (sorted)" (150 iterations)
846/5400 correct = 15.67 +/- 0.97% w/ 95% conf. (normal approx. interval)
Standard Error: 2.7014
Pearson Corellation Coefficient (r): 0.6465
Coefficient of Determination (r^2): 0.4180
Spearman Coefficient: 0.6536


Confusion Matrix:
    	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	10.0	11.0	12.0	|	total	acc.
    	---	---	---	---	---	---	---	---	---	----	----	----	|	-----	----
1.0	117	99	59	70	40	31	11	12	5	1	2	3	|	450	26.00%
2.0	119	81	52	65	49	21	20	15	4	5	5	14	|	450	18.00%
3.0	119	90	46	72	36	36	9	13	7	3	16	3	|	450	10.22%
4.0	113	83	71	34	59	16	21	17	15	6	9	6	|	450	7.56%
5.0	72	66	58	66	41	22	23	49	33	9	4	7	|	450	9.11%
6.0	67	57	32	27	57	60	35	43	22	29	13	8	|	450	13.33%
7.0	26	52	29	38	29	41	55	62	16	31	36	35	|	450	12.22%
8.0	35	40	24	31	58	29	35	63	62	29	15	29	|	450	14.00%
9.0	15	20	42	12	13	8	15	72	80	60	47	66	|	450	17.78%
10.0	13	14	3	11	8	20	21	39	78	77	29	137	|	450	17.11%
11.0	27	30	21	10	16	28	

In [22]:
exp.PerSampleStatistics( output_filepath='master_LELS_inner4_i40_j3_n150_WITHsplit.results.tsv' )
exp.PerSampleStatistics( print_indiv=False, output_filepath='master_LELS_inner4_i40_j3_n150.results.tsv' )

Saving output of function "PerSampleStatistics()" to file "master_LELS_inner4_i40_j3_n150_WITHsplit.results.tsv", mode "w"
Saving output of function "PerSampleStatistics()" to file "master_LELS_inner4_i40_j3_n150.results.tsv", mode "w"


<FeatureSpaceClassificationExperiment #2 "master_LELS_inner4tiles.fit (sorted)" n_splits=150 n_calls=5400 n_corr=846 acc=15.67% std_err=2.70%>

# 12-class Severity score cross-validation WITH LDA

In [23]:
fs = FeatureSpace.NewFromFitFile( 'master_LELS_inner4tiles.fit', tile_num_rows=2, tile_num_cols=2)

LOADED FEATURE SPACE FROM WND-CHARM .fit FILE master_LELS_inner4tiles.fit: <FeatureSpace "master_LELS_inner4tiles.fit" n_features=2919 n_total_samples=4108 n_samples_per_group=4 n_classes=12 samples_per_class=("1.0": 468, "2.0": 416, "3.0": 208, "4.0": 304, "5.0": 204, "6.0": 216, "7.0": 296, "8.0": 560, "9.0": 220, "10.0": 264, "11.0": 780, "12.0": 172)>


In [24]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 40
kwargs['test_size'] = 3
kwargs['n_iter'] = 150
kwargs['lda'] = True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [25]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.166666666667
1	0.222222222222
2	0.194444444444
3	0.166666666667
4	0.138888888889
5	0.277777777778
6	0.25
7	0.194444444444
8	0.166666666667
9	0.166666666667
10	0.138888888889
11	0.0833333333333
12	0.166666666667
13	0.222222222222
14	0.194444444444
15	0.25
16	0.0555555555556
17	0.25
18	0.25
19	0.25
20	0.194444444444
21	0.194444444444
22	0.138888888889
23	0.0555555555556
24	0.194444444444
25	0.222222222222
26	0.166666666667
27	0.277777777778
28	0.138888888889
29	0.333333333333
30	0.194444444444
31	0.222222222222
32	0.194444444444
33	0.194444444444
34	0.194444444444
35	0.222222222222
36	0.111111111111
37	0.111111111111
38	0.138888888889
39	0.194444444444
40	0.25
41	0.166666666667
42	0.222222222222
43	0.388888888889
44	0.194444444444
45	0.166666666667
46	0.25
47	0.305555555556
48	0.166666666667
49	0.166666666667
50	0.194444444444
51	0.0833333333333
52	0.0833333333333
53	0.277777777778
54	0.222222222222
55	0.138888888889
56	0.111111111111
57	0.111111111111
58	0.25
5

In [26]:
exp.Print()

Displaying feature weight statistics for all 11 features
FeatureSpaceClassificationExperiment "master_LELS_inner4tiles.fit (sorted)" (150 iterations)
967/5400 correct = 17.91 +/- 1.02% w/ 95% conf. (normal approx. interval)
Standard Error: 2.6056
Pearson Corellation Coefficient (r): 0.6897
Coefficient of Determination (r^2): 0.4757
Spearman Coefficient: 0.6948


Confusion Matrix:
    	1.0	2.0	3.0	4.0	5.0	6.0	7.0	8.0	9.0	10.0	11.0	12.0	|	total	acc.
    	---	---	---	---	---	---	---	---	---	----	----	----	|	-----	----
1.0	119	89	82	55	35	17	9	17	7	7	7	6	|	450	26.44%
2.0	93	84	65	71	57	20	17	17	6	2	9	9	|	450	18.67%
3.0	94	71	69	69	46	52	11	17	9	2	5	5	|	450	15.33%
4.0	65	59	95	56	51	33	19	31	18	1	17	5	|	450	12.44%
5.0	41	66	56	62	62	26	43	28	25	14	21	6	|	450	13.78%
6.0	41	33	59	52	36	41	60	43	41	18	20	6	|	450	9.11%
7.0	19	25	24	32	26	64	81	59	33	34	38	15	|	450	18.00%
8.0	23	36	25	46	31	41	43	63	52	42	23	25	|	450	14.00%
9.0	19	12	26	13	19	32	44	57	90	64	32	42	|	450	20.00%
10.0	10	7	8	13	17	1

# Case-control analysis

In [2]:
import wndcharm

In [3]:
wndcharm.__version__

'0.9.3'

In [4]:
from wndcharm.FeatureSpace import FeatureSpace

In [32]:
fs = FeatureSpace.NewFromFitFile( 'master_LELS_inner4tiles.fit', tile_num_rows=2, tile_num_cols=2)

LOADED FEATURE SPACE FROM WND-CHARM .fit FILE master_LELS_inner4tiles.fit: <FeatureSpace "master_LELS_inner4tiles.fit" n_features=2919 n_total_samples=4108 n_samples_per_group=4 n_classes=12 samples_per_class=("1.0": 468, "2.0": 416, "3.0": 208, "4.0": 304, "5.0": 204, "6.0": 216, "7.0": 296, "8.0": 560, "9.0": 220, "10.0": 264, "11.0": 780, "12.0": 172)>


In [11]:
def convert_to_case_control( lab ):
    val = int( float( lab ) )
    if val <= 3:
        return 'control'
    elif val >=7:
        return 'case'
    elif val >=4 and val <= 6:
        return 'between'
    else:
        raise ValueError

In [33]:
labels = [ convert_to_case_control( lab ) for lab in fs._contiguous_ground_truth_labels ]

In [34]:
fs._contiguous_ground_truth_labels = labels

In [35]:
fs.SortSamplesByGroundTruth( rebuild_views=True, inplace=True )

<FeatureSpace "master_LELS_inner4tiles.fit (sorted)" n_features=2919 n_total_samples=4108 n_samples_per_group=4 n_classes=3 samples_per_class=("between": 724, "case": 2292, "control": 1092)>

In [16]:
fs.ToFitFile( 'master_LELS_inner4tiles_3Class_case_control_between.fit')

In [22]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 120
kwargs['test_size'] = 20
kwargs['n_iter'] = 200
kwargs['lda'] = False # True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [19]:
from wndcharm.FeatureSpacePredictionExperiment import FeatureSpaceClassificationExperiment

In [23]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.433333333333
1	0.583333333333
2	0.566666666667
3	0.55
4	0.55
5	0.566666666667
6	0.533333333333
7	0.55
8	0.5
9	0.516666666667
10	0.516666666667
11	0.383333333333
12	0.516666666667
13	0.45
14	0.4
15	0.533333333333
16	0.483333333333
17	0.516666666667
18	0.516666666667
19	0.483333333333
20	0.55
21	0.616666666667
22	0.5
23	0.533333333333
24	0.633333333333
25	0.6
26	0.566666666667
27	0.533333333333
28	0.533333333333
29	0.433333333333
30	0.5
31	0.5
32	0.466666666667
33	0.566666666667
34	0.433333333333
35	0.466666666667
36	0.483333333333
37	0.533333333333
38	0.483333333333
39	0.433333333333
40	0.55
41	0.55
42	0.516666666667
43	0.516666666667
44	0.516666666667
45	0.45
46	0.516666666667
47	0.45
48	0.583333333333
49	0.483333333333
50	0.533333333333
51	0.516666666667
52	0.516666666667
53	0.633333333333
54	0.483333333333
55	0.416666666667
56	0.5
57	0.466666666667
58	0.516666666667
59	0.6
60	0.583333333333
61	0.616666666667
62	0.483333333333
63	0.583333333333
64	0.55
65	0.5

In [24]:
exp.Print()

FeatureSpaceClassificationExperiment "master_LELS_inner4tiles.fit (sorted) (sorted)" (200 iterations)
6317/12000 correct = 52.64 +/- 0.89% w/ 95% conf. (normal approx. interval)


Confusion Matrix:
       	between	case	control	|	total	acc.
       	-------	----	-------	|	-----	----
between	1074	709	2217	|	4000	26.85%
case	654	2530	816	|	4000	63.25%
control	1044	243	2713	|	4000	67.83%


Similarity Matrix:
       	between	case	control
       	-------	----	-------
between	1.00	0.74	1.12	
case	0.61	1.00	0.53	
control	0.83	0.45	1.00	


Average Class Probability Matrix:
       	between	case	control
       	-------	----	-------
between	0.3500	0.2583	0.3917	
case	0.2844	0.4684	0.2471	
control	0.3640	0.1985	0.4375	


Feature Weight Analysis (top 20 features):
Rank	mean	count	StdDev	Min	Max	Name
----	----	-----	------	---	---	----
1	0.280	200	0.053	0.145	0.437	Fractal Features () [19]
2	0.274	200	0.053	0.141	0.428	Fractal Features () [18]
3	0.267	200	0.052	0.135	0.417	Fractal Features () [17]
4	0

In [27]:
fs.RemoveClass('between', inplace=True)

REMOVED CLASS between, RESULTANT FEATURE SPACE: <FeatureSpace "master_LELS_inner4tiles.fit (sorted) (subset) (sorted)" n_features=2919 n_total_samples=3384 n_samples_per_group=4 n_classes=2 samples_per_class=("case": 2292, "control": 1092)>


<FeatureSpace "master_LELS_inner4tiles.fit (sorted) (subset) (sorted)" n_features=2919 n_total_samples=3384 n_samples_per_group=4 n_classes=2 samples_per_class=("case": 2292, "control": 1092)>

In [28]:
fs

<FeatureSpace "master_LELS_inner4tiles.fit (sorted) (subset) (sorted)" n_features=2919 n_total_samples=3384 n_samples_per_group=4 n_classes=2 samples_per_class=("case": 2292, "control": 1092)>

In [29]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 120
kwargs['test_size'] = 20
kwargs['n_iter'] = 200
kwargs['lda'] = False # True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [30]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.75
1	0.9
2	0.675
3	0.7
4	0.85
5	0.75
6	0.65
7	0.825
8	0.825
9	0.85
10	0.9
11	0.775
12	0.775
13	0.825
14	0.675
15	0.7
16	0.775
17	0.75
18	0.7
19	0.775
20	0.675
21	0.925
22	0.8
23	0.825
24	0.65
25	0.825
26	0.825
27	0.875
28	0.875
29	0.775
30	0.75
31	0.775
32	0.75
33	0.825
34	0.725
35	0.75
36	0.825
37	0.775
38	0.8
39	0.8
40	0.8
41	0.95
42	0.775
43	0.775
44	0.85
45	0.75
46	0.925
47	0.85
48	0.9
49	0.85
50	0.875
51	0.8
52	0.8
53	0.7
54	0.725
55	0.725
56	0.925
57	0.825
58	0.75
59	0.85
60	0.775
61	0.7
62	0.85
63	0.8
64	0.8
65	0.775
66	0.75
67	0.775
68	0.775
69	0.875
70	0.75
71	0.8
72	0.875
73	0.9
74	0.825
75	0.825
76	0.75
77	0.875
78	0.8
79	0.75
80	0.675
81	0.8
82	0.95
83	0.725
84	0.925
85	0.75
86	0.8
87	0.725
88	0.75
89	0.775
90	0.775
91	0.85
92	0.8
93	0.775
94	0.875
95	0.8
96	0.7
97	0.85
98	0.775
99	0.85
100	0.725
101	0.625
102	0.725
103	0.775
104	0.8
105	0.8
106	0.875
107	0.625
108	0.775
109	0.8
110	0.8
111	0.825
112	0.725
113	0.85
114	0.85
115	0.875
116	0.65
117	0

In [31]:
exp.Print()

FeatureSpaceClassificationExperiment "master_LELS_inner4tiles.fit (sorted) (subset) (sorted) (sorted)" (200 iterations)
6337/8000 correct = 79.21 +/- 0.89% w/ 95% conf. (normal approx. interval)


Confusion Matrix:
       	case	control	|	total	acc.
       	----	-------	|	-----	----
case	2722	1278	|	4000	68.05%
control	385	3615	|	4000	90.38%


Similarity Matrix:
       	case	control
       	----	-------
case	1.00	0.60	
control	0.46	1.00	


Average Class Probability Matrix:
       	case	control
       	----	-------
case	0.6261	0.3739	
control	0.3140	0.6860	


Feature Weight Analysis (top 20 features):
Rank	mean	count	StdDev	Min	Max	Name
----	----	-----	------	---	---	----
1	0.448	200	0.104	0.229	0.836	Comb Moments (Fourier ()) [3]
2	0.429	200	0.078	0.263	0.687	Fractal Features () [19]
3	0.424	200	0.078	0.260	0.680	Fractal Features () [18]
4	0.418	200	0.100	0.213	0.774	Comb Moments (Fourier ()) [4]
5	0.417	200	0.078	0.256	0.671	Fractal Features () [17]
6	0.407	200	0.078	0.249	0.658	Fracta

In [36]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 120
kwargs['test_size'] = 20
kwargs['n_iter'] = 200
kwargs['lda'] = True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [37]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.55
1	0.583333333333
2	0.5
3	0.566666666667
4	0.566666666667
5	0.516666666667
6	0.566666666667
7	0.7
8	0.533333333333
9	0.616666666667
10	0.5
11	0.433333333333
12	0.45
13	0.566666666667
14	0.466666666667
15	0.6
16	0.533333333333
17	0.45
18	0.616666666667
19	0.516666666667
20	0.4
21	0.55
22	0.433333333333
23	0.5
24	0.6
25	0.433333333333
26	0.483333333333
27	0.7
28	0.5
29	0.5
30	0.5
31	0.516666666667
32	0.55
33	0.616666666667
34	0.6
35	0.5
36	0.566666666667
37	0.483333333333
38	0.433333333333
39	0.55
40	0.616666666667
41	0.633333333333
42	0.433333333333
43	0.466666666667
44	0.616666666667
45	0.55
46	0.466666666667
47	0.616666666667
48	0.516666666667
49	0.5
50	0.583333333333
51	0.666666666667
52	0.55
53	0.55
54	0.6
55	0.5
56	0.583333333333
57	0.483333333333
58	0.466666666667
59	0.616666666667
60	0.55
61	0.566666666667
62	0.433333333333
63	0.583333333333
64	0.533333333333
65	0.433333333333
66	0.55
67	0.533333333333
68	0.516666666667
69	0.5
70	0.533333333333
71	0.36



In [38]:
exp.Print()

Displaying feature weight statistics for all 2 features
FeatureSpaceClassificationExperiment "master_LELS_inner4tiles.fit (sorted) (sorted)" (200 iterations)
6493/12000 correct = 54.11 +/- 0.89% w/ 95% conf. (normal approx. interval)


Confusion Matrix:
       	between	case	control	|	total	acc.
       	-------	----	-------	|	-----	----
between	1565	932	1503	|	4000	39.12%
case	914	2594	492	|	4000	64.85%
control	1326	340	2334	|	4000	58.35%


Similarity Matrix:
       	between	case	control
       	-------	----	-------
between	1.00	0.67	0.94	
case	0.50	1.00	0.30	
control	0.69	0.26	1.00	


Average Class Probability Matrix:
       	between	case	control
       	-------	----	-------
between	0.3840	0.2566	0.3594	
case	0.2767	0.5575	0.1659	
control	0.3529	0.1336	0.5135	


Feature Weight Analysis (top 2 features):
Rank	mean	count	StdDev	Min	Max	Name
----	----	-----	------	---	---	----
1	1.000	200	0.000	1.000	1.000	LDAcomponent2
2	1.000	200	0.000	1.000	1.000	LDAcomponent1


In [39]:
fs.RemoveClass('between', inplace=True)

REMOVED CLASS between, RESULTANT FEATURE SPACE: <FeatureSpace "master_LELS_inner4tiles.fit (sorted) (subset) (sorted)" n_features=2919 n_total_samples=3384 n_samples_per_group=4 n_classes=2 samples_per_class=("case": 2292, "control": 1092)>


<FeatureSpace "master_LELS_inner4tiles.fit (sorted) (subset) (sorted)" n_features=2919 n_total_samples=3384 n_samples_per_group=4 n_classes=2 samples_per_class=("case": 2292, "control": 1092)>

In [40]:
kwargs = {}
kwargs['feature_space'] = fs
kwargs['train_size'] = 120
kwargs['test_size'] = 20
kwargs['n_iter'] = 200
kwargs['lda'] = True
kwargs['quiet'] = True
kwargs['random_state'] = 42
kwargs['progress'] = True

In [41]:
exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( **kwargs )

iter	split class acc.
0	0.8
1	0.75
2	0.85
3	0.675
4	0.775
5	0.8
6	0.725
7	0.8
8	0.775
9	0.95
10	0.825
11	0.775
12	0.8
13	0.875
14	0.7
15	0.775
16	0.775
17	0.725
18	0.75
19	0.825
20	0.65
21	0.825
22	0.775
23	0.825
24	0.8
25	0.75
26	0.875
27	0.825
28	0.75
29	0.8
30	0.775
31	0.775
32	0.825
33	0.6
34	0.75
35	0.725
36	0.8
37	0.8
38	0.8
39	0.875
40	0.75
41	0.85
42	0.725
43	0.8
44	0.875
45	0.775
46	0.825
47	0.725
48	0.725
49	0.825
50	0.85
51	0.8
52	0.825
53	0.775
54	0.775
55	0.7
56	0.8
57	0.775
58	0.85
59	0.9
60	0.8
61	0.775
62	0.9
63	0.875
64	0.875
65	0.75
66	0.725
67	0.85
68	0.825
69	0.85
70	0.825
71	0.8
72	0.75
73	0.875
74	0.825
75	0.775
76	0.7
77	0.875
78	0.8
79	0.75
80	0.825
81	0.775
82	0.9
83	0.725
84	0.9
85	0.775
86	0.725
87	0.8
88	0.725
89	0.85
90	0.7
91	0.875
92	0.775
93	0.775
94	0.775
95	0.875
96	0.75
97	0.825
98	0.825
99	0.825
100	0.775
101	0.65
102	0.65
103	0.725
104	0.725
105	0.725
106	0.775
107	0.75
108	0.85
109	0.825
110	0.775
111	0.825
112	0.825
113	0.825
114	0.875
115	0.875
1

In [42]:
exp.Print()

Displaying feature weight statistics for all 1 features
FeatureSpaceClassificationExperiment "master_LELS_inner4tiles.fit (sorted) (subset) (sorted) (sorted)" (200 iterations)
6347/8000 correct = 79.34 +/- 0.89% w/ 95% conf. (normal approx. interval)


Confusion Matrix:
       	case	control	|	total	acc.
       	----	-------	|	-----	----
case	3061	939	|	4000	76.53%
control	714	3286	|	4000	82.15%


Similarity Matrix:
       	case	control
       	----	-------
case	1.00	0.43	
control	0.34	1.00	


Average Class Probability Matrix:
       	case	control
       	----	-------
case	0.7005	0.2995	
control	0.2529	0.7471	


Feature Weight Analysis (top 1 features):
Rank	mean	count	StdDev	Min	Max	Name
----	----	-----	------	---	---	----
1	1.000	200	0.000	1.000	1.000	LDAcomponent1
