## Notebook for the statstable in Supp.7

## Imports and definitions

In [2]:
%env DJ_SUPPORT_FILEPATH_MANAGEMENT TRUE
%load_ext autoreload
%autoreload 2
%matplotlib inline
import sys, os
import numpy as np
from rgc_natstim_model.analyses.context_change_detection import bootstrap_ci, cohens_d, perform_permutation_test
from rgc_natstim_model.constants.plot_settings import cmap_colors as rgc_colors
from rgc_natstim_model.utils.data_handling import unPickle,makePickle

env: DJ_SUPPORT_FILEPATH_MANAGEMENT=TRUE


In [None]:
base_folder = '/gpfs01/euler/data/SharedFiles/projects/Hoefling2024/'
roc_path = base_folder+'data/simulation/roc/'
respGen_path = base_folder+'data/simulation/response_generation/'
stimGen_path = base_folder+'data/simulation/stimulus_generation/'

## Load auc_per_type value calculated in the ROC analysis

use speed = 4 to demonstrate 3 statistic tested used

In [47]:
speed = 4

In [None]:
auc_by_type_values = unPickle(roc_path+'auc_per_type_s{}.pkl'.format(speed))

In [50]:
types = np.arange(1, 33)
print(types)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32]


inspect what the dictionary is like

In [49]:
auc_by_type_values[2] # per element represent one cell's AUC value for the selected speed

array([0.61777217, 0.52824245, 0.5433285 , 0.47192383, 0.49260933,
       0.60625622, 0.62769542, 0.5177227 , 0.55481974, 0.5538293 ,
       0.48666616, 0.52800603])

### calculate the AUC_per_type across cell

In [51]:
auc_by_type = dict.fromkeys(types)
for t in types:
    auc_by_type[t] = auc_by_type_values[t].mean()
    print("type {}: n_cells = {}, auc = {:.2f}".format(t, len(auc_by_type_values[t]), auc_by_type[t]))

type 1: n_cells = 31, auc = 0.55
type 2: n_cells = 12, auc = 0.54
type 3: n_cells = 8, auc = 0.57
type 4: n_cells = 51, auc = 0.52
type 5: n_cells = 33, auc = 0.51
type 6: n_cells = 11, auc = 0.52
type 7: n_cells = 22, auc = 0.52
type 8: n_cells = 15, auc = 0.54
type 9: n_cells = 14, auc = 0.53
type 10: n_cells = 26, auc = 0.58
type 11: n_cells = 56, auc = 0.54
type 12: n_cells = 86, auc = 0.56
type 13: n_cells = 18, auc = 0.57
type 14: n_cells = 15, auc = 0.55
type 15: n_cells = 1, auc = 0.72
type 16: n_cells = 9, auc = 0.62
type 17: n_cells = 262, auc = 0.60
type 18: n_cells = 108, auc = 0.55
type 19: n_cells = 2, auc = 0.53
type 20: n_cells = 45, auc = 0.54
type 21: n_cells = 56, auc = 0.64
type 22: n_cells = 160, auc = 0.68
type 23: n_cells = 150, auc = 0.66
type 24: n_cells = 29, auc = 0.65
type 25: n_cells = 65, auc = 0.68
type 26: n_cells = 26, auc = 0.58
type 27: n_cells = 16, auc = 0.73
type 28: n_cells = 69, auc = 0.77
type 29: n_cells = 49, auc = 0.67
type 30: n_cells = 4, a

## Boostrapped 95% CI for difference between auc_per_type and auc_type28

In [53]:
for t in types:
    print('For type {}'.format(t))
    print("    delta auc 28 vs {} = {:.2f}".format(t, auc_by_type[28] - auc_by_type[t]))
    [lower_bound, upper_bound], _ = bootstrap_ci(auc_by_type_values[t], auc_by_type_values[28])
    print("    95% CI = [{:.2f}, {:.2f}]".format(lower_bound, upper_bound))

For type 1
    delta auc 28 vs 1 = 0.22
    95% CI = [-0.24, -0.19]
For type 2
    delta auc 28 vs 2 = 0.23
    95% CI = [-0.25, -0.20]
For type 3
    delta auc 28 vs 3 = 0.20
    95% CI = [-0.23, -0.17]
For type 4
    delta auc 28 vs 4 = 0.25
    95% CI = [-0.26, -0.24]
For type 5
    delta auc 28 vs 5 = 0.26
    95% CI = [-0.28, -0.25]
For type 6
    delta auc 28 vs 6 = 0.25
    95% CI = [-0.28, -0.22]
For type 7
    delta auc 28 vs 7 = 0.25
    95% CI = [-0.28, -0.22]
For type 8
    delta auc 28 vs 8 = 0.23
    95% CI = [-0.26, -0.20]
For type 9
    delta auc 28 vs 9 = 0.24
    95% CI = [-0.26, -0.22]
For type 10
    delta auc 28 vs 10 = 0.18
    95% CI = [-0.21, -0.16]
For type 11
    delta auc 28 vs 11 = 0.23
    95% CI = [-0.25, -0.20]
For type 12
    delta auc 28 vs 12 = 0.21
    95% CI = [-0.23, -0.20]
For type 13
    delta auc 28 vs 13 = 0.20
    95% CI = [-0.25, -0.16]
For type 14
    delta auc 28 vs 14 = 0.22
    95% CI = [-0.24, -0.20]
For type 15
    delta auc 28 vs 15 = 0

## Calculate the effect size (Cohen's d), each pair between one type and type28

In [55]:
for t in types:
    print('For type {}'.format(t))
    d = cohens_d(auc_by_type_values[t], auc_by_type_values[28])
    print("    Cohen's d: {:.2f}".format( d))

For type 1
    Cohen's d: 4.36
For type 2
    Cohen's d: 6.09
For type 3
    Cohen's d: 5.44
For type 4
    Cohen's d: 6.10
For type 5
    Cohen's d: 7.11
For type 6
    Cohen's d: 6.58
For type 7
    Cohen's d: 5.52
For type 8
    Cohen's d: 6.17
For type 9
    Cohen's d: 6.61
For type 10
    Cohen's d: 4.45
For type 11
    Cohen's d: 4.01
For type 12
    Cohen's d: 3.58
For type 13
    Cohen's d: 3.43
For type 14
    Cohen's d: 5.62
For type 15
    Cohen's d: 1.34
For type 16
    Cohen's d: 3.73
For type 17
    Cohen's d: 2.74
For type 18
    Cohen's d: 5.51
For type 19
    Cohen's d: 6.94
For type 20
    Cohen's d: 5.80
For type 21
    Cohen's d: 2.99
For type 22
    Cohen's d: 1.64
For type 23
    Cohen's d: 2.63
For type 24
    Cohen's d: 3.04
For type 25
    Cohen's d: 2.29
For type 26
    Cohen's d: 3.16
For type 27
    Cohen's d: 0.85
For type 28
    Cohen's d: 0.00
For type 29
    Cohen's d: 2.12
For type 30
    Cohen's d: 0.77
For type 31
    Cohen's d: 1.47
For type 32
    C

## Compare distributions (one type and type28) by permutation test

null-hypothesis: the two distributions are the same

In [56]:
n_rep = 100000
for t in types:
    print('For type {}'.format(t))
    permuted_delta, orig_delta, p  = perform_permutation_test(auc_by_type_values[t], 
                                                          auc_by_type_values[28],
                                                         n_rep=n_rep)
    print("    p value  {:.2f}".format(p))

For type 1
    p value  0.00
For type 2
    p value  0.00
For type 3
    p value  0.00
For type 4
    p value  0.00
For type 5
    p value  0.00
For type 6
    p value  0.00
For type 7
    p value  0.00
For type 8
    p value  0.00
For type 9
    p value  0.00
For type 10
    p value  0.00
For type 11
    p value  0.00
For type 12
    p value  0.00
For type 13
    p value  0.00
For type 14
    p value  0.00
For type 15
    p value  0.17
For type 16
    p value  0.00
For type 17
    p value  0.00
For type 18
    p value  0.00
For type 19
    p value  0.00
For type 20
    p value  0.00
For type 21
    p value  0.00
For type 22
    p value  0.00
For type 23
    p value  0.00
For type 24
    p value  0.00
For type 25
    p value  0.00
For type 26
    p value  0.00
For type 27
    p value  0.00
For type 28
    p value  1.00
For type 29
    p value  0.00
For type 30
    p value  0.13
For type 31
    p value  0.00
For type 32
    p value  0.00


## Find next-best type for type28 per speed

then repeat the three statistic tests above for the pair (next-best,type28)

In [114]:
sorted_auc_by_type = dict(sorted(auc_by_type.items(), key=lambda item: item[1]))

In [116]:
sorted_types = list(sorted_auc_by_type.keys())
print(sorted_types)

[5, 6, 4, 7, 19, 9, 32, 8, 20, 11, 2, 14, 18, 1, 12, 13, 3, 26, 10, 17, 16, 21, 31, 24, 23, 29, 25, 22, 15, 27, 30, 28]


In [117]:
top_4s = sorted_types[-4:]

In [118]:
top_4s

[15, 27, 30, 28]

In [None]:
for speed in [4,12,20,28]:
    mean_per_types =[]
    print('Top 4 groups for SPEED',s)
    auc_by_type_file = auc_path + 'auc_per_type_s{}.pkl'.format(speed)
    
    # print('Open: ',auc_by_type_file)
    auc_by_type_values = unPickle(auc_by_type_file)
    
    for t in types:
        mean_per_types.append(np.mean(auc_by_type_values[t]))
    tmp = list(np.copy(mean_per_types))
    find_top4(tmp,mean_per_types)

### first, find the top 4 groups
#### then pick the group that is worse than G28

In [124]:
types = np.arange(1, 33)

for s in [4,12,20,28]:
    mean_per_types =[]
    print('Top 4 groups for SPEED',s)
    auc_by_type_file = roc_path + 'auc_per_type_s{}.pkl'.format(s)
    auc_by_type_values = unPickle(auc_by_type_file)
    for t in types:
        auc_by_type[t] = auc_by_type_values[t].mean()

    sorted_auc_by_type = dict(sorted(auc_by_type.items(), key=lambda item: item[1]))
    sorted_types = list(sorted_auc_by_type.keys())
    top_4s = sorted_types[-4:]
    print('  No.1 {}, No.2 {}, No.3 {}, No.4 {}'.format(top_4s[::-1][0],top_4s[::-1][1],
                                                        top_4s[::-1][2],top_4s[::-1][3]))

Top 4 groups for SPEED 4
  No.1 28, No.2 30, No.3 27, No.4 15
Top 4 groups for SPEED 12
  No.1 30, No.2 15, No.3 28, No.4 27
Top 4 groups for SPEED 20
  No.1 15, No.2 28, No.3 25, No.4 27
Top 4 groups for SPEED 28
  No.1 15, No.2 28, No.3 27, No.4 25


In [127]:
## manually enter
next_bests = {
    4:30,
    12:27,
    20:25,
    28:27
}

### statistic tests for G28 v.s. next_best per speed

In [128]:
n_rep=100000

In [130]:
for speed in [4,12,20,28]:
    next_best = next_bests[speed]
    print('SPEED {} , G28 v.sG{}'.format(speed,next_best))
    auc_by_type_file = roc_path + 'auc_per_type_s{}.pkl'.format(speed)
    auc_by_type_values = unPickle(auc_by_type_file)

    print('    ==========Mean for AUC_per_type==========: ')
    auc_by_type[28] = auc_by_type_values[28].mean()
    print("        type {}: n_cells = {}, auc = {:.2f}, std = {:.2f}".format(28, 
                                                                             len(auc_by_type_values[28]), 
                                                                             auc_by_type[28],
                                                                             auc_by_type_values[28].std()))
    
    auc_by_type[next_best] = auc_by_type_values[next_best].mean()
    print("        type {}: n_cells = {}, auc = {:.2f}, std = {:.2f}".format(next_best,
                                                                             len(auc_by_type_values[next_best]), 
                                                                             auc_by_type[next_best],
                                                                             auc_by_type_values[next_best].std()))

    
    print('    ==========delta AUC AND 95% CI========== ')
    print("        delta auc 28 vs {} = {:.2f}".format(t, auc_by_type[28] - auc_by_type[next_best]))
    [lower_bound, upper_bound], _ = bootstrap_ci(auc_by_type_values[next_best], auc_by_type_values[28])
    print("        95% CI = [{:.2f}, {:.2f}]".format(lower_bound, upper_bound))
    
    print('    ==========Cohen"s d==========')
    d = cohens_d(auc_by_type_values[next_best], auc_by_type_values[28])
    print("        Cohen's d for type {}: {:.2f}".format(next_best, d))
        
    print('    ==========Permutation test==========')
    permuted_delta, orig_delta,p = perform_permutation_test(auc_by_type_values[next_best], 
                                                      auc_by_type_values[28],
                                                     n_rep=n_rep)
    print("        sum = {}, p value for type {}: {:.6f}".format(sum(permuted_delta>orig_delta),
                                                                 next_best, p))

SPEED 4 , G28 v.sG30
        type 28: n_cells = 69, auc = 0.77, std = 0.03
        type 30: n_cells = 4, auc = 0.74, std = 0.03
        delta auc 28 vs 32 = 0.03
        95% CI = [-0.05, 0.01]
        Cohen's d for type 30: 0.77
        sum = 13362, p value for type 30: 0.133620
SPEED 12 , G28 v.sG27
        type 28: n_cells = 69, auc = 0.79, std = 0.02
        type 27: n_cells = 16, auc = 0.78, std = 0.04
        delta auc 28 vs 32 = 0.01
        95% CI = [-0.03, 0.01]
        Cohen's d for type 27: 0.47
        sum = 9539, p value for type 27: 0.095390
SPEED 20 , G28 v.sG25
        type 28: n_cells = 69, auc = 0.80, std = 0.01
        type 25: n_cells = 65, auc = 0.79, std = 0.02
        delta auc 28 vs 32 = 0.00
        95% CI = [-0.01, 0.00]
        Cohen's d for type 25: 0.20
        sum = 25512, p value for type 25: 0.255120
SPEED 28 , G28 v.sG27
        type 28: n_cells = 69, auc = 0.79, std = 0.01
        type 27: n_cells = 16, auc = 0.78, std = 0.03
        delta auc 28 vs 32 