## What we are working with

- The file *algorithmic-approach.xlsx* contains the collected data from all the proposed algorithmic approaches in each study, which includes:
    - Input data
    - Model class
    - Train/test approach (retrospective vs prospective)
    - Performance evaluation (deterministic/probabilistic)

- All except for train/test approach may contain more than one instance, separated by ' and '

- The sheet _data-source_ contains one table entry for each algorithmic approach, so we need to transform that into a matrix of the form:


| Element 1   | Category 1   | Element 2    | Category 2    |  Weight |
| ----------- | ------------ | ------------ |  ------------ | ------------ |
| EEG | Input data | seizure times | Input data | 3 |
| EEG | Input data | ML | Model class | 10 |


In [1]:
# built-in
from itertools import combinations

# third-party
import pandas as pd

# local
from get_matrix import get_individual_occurrences

-----
## Creating _'papers and inputs'_

In [2]:
filepath = '../aux_files/algorithmic-approach.xlsx'
df = pd.read_excel(filepath, sheet_name='data-source')
df

Unnamed: 0,Input data,Model class,Approach,Performance evaluation
0,physio. data,DL,R,deterministic
1,physio. data and ToD,DL,R,deterministic
2,other cyclic profiles,phase analysis,R,deterministic
3,other cyclic profiles,phase analysis,P,deterministic
4,physio. data,ML,R,deterministic
5,physio. data,ML,P,deterministic
6,physio. data,ML,P,both
7,physio. data,ML,R,deterministic
8,physio. data,ML,P,probabilistic
9,physio. data,ML,R,deterministic


In [3]:

cat1 = get_individual_occurrences(df['Input data'], ' and ')
cat2 = get_individual_occurrences(df['Model class'], ' and ')
cat3 = get_individual_occurrences(df['Approach'], ' and ')
cat4 = get_individual_occurrences(df['Performance evaluation'], ' and ')

df_input = pd.concat([cat1, cat2], axis=1)

df_input

Unnamed: 0,physio. data,ToD,other cyclic profiles,seizure cyclic profile,seizure times,other info,DL,phase analysis,ML,PP-GLM
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
8,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
9,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


#### Get all possible combinations to get weights 

In [4]:
weights = []

for c in combinations(df_input.columns, 2):
    weights += [[c[0], None, c[1], None, df_input.loc[:, [c[0], c[1]]].all(True).sum()]]

weights_df = pd.DataFrame(weights, columns=['source', 'category1', 'target', 'category2', 'weight'])

for ele in df_input.columns:
    if ele in cat1.columns:
        cat = 'Input data'
    elif ele in cat2.columns:
        cat = 'Model class'
    elif ele in cat3.columns:
        cat = 'Approach'
    else:
        cat = 'Performance evaluation'

    weights_df.iloc[weights_df['source'] == ele, 1] = cat
    weights_df.iloc[weights_df['target'] == ele, 3] = cat
        
weights_df


Unnamed: 0,source,category1,target,category2,weight
0,physio. data,Input data,ToD,Input data,3
1,physio. data,Input data,other cyclic profiles,Input data,5
2,physio. data,Input data,seizure cyclic profile,Input data,3
3,physio. data,Input data,seizure times,Input data,7
4,physio. data,Input data,other info,Input data,4
5,physio. data,Input data,DL,Model class,11
6,physio. data,Input data,phase analysis,Model class,1
7,physio. data,Input data,ML,Model class,14
8,physio. data,Input data,PP-GLM,Model class,4
9,ToD,Input data,other cyclic profiles,Input data,0


----
## Now, onto creating the chord diagram

In [5]:
# Load d3blocks
from d3blocks import D3Blocks
#
# Initialize
d3 = D3Blocks(chart='Chord', frame=False, )


# Node properties
d3.set_node_properties(weights_df, opacity=0.2, cmap='tab20', arrowhead=-1, color='source')
d3.set_edge_properties(weights_df, color='source', arrowhead=-1, opacity=1)

#
# Show the chart
d3.show()
#
# Make some edits to highlight the Nuclear node
# d3.node_properties
# d3.node_properties.get('Nuclear')['color']='#ff0000'
# d3.node_properties.get('Nuclear')['opacity']=1
# # Show the chart
# #
# d3.show()
# # Make edits to highlight the Nuclear Edge
# d3.edge_properties.get(('Nuclear', 'Thermal generation'))['color']='#ff0000'
# d3.edge_properties.get(('Nuclear', 'Thermal generation'))['opacity']=0.8
# d3.edge_properties.get(('Nuclear', 'Thermal generation'))['weight']=1000
#
# Show the chart
d3.show()

[d3blocks] >INFO> Cleaning edge_properties and config parameters..
[d3blocks] >INFO> Initializing [Chord]
[d3blocks] >INFO> filepath is set to [/var/folders/zr/w_9gpc_15tvbm65ms36b_g680000gn/T/d3blocks/chord.html]
[d3blocks] >INFO> Node properties are set.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if 'weight' in X.columns: X['weight'] = X['weight'].astype(float)
[d3blocks] >INFO> Edge properties are set.
[d3blocks] >INFO> File already exists and will be overwritten: [/var/folders/zr/w_9gpc_15tvbm65ms36b_g680000gn/T/d3blocks/chord.html]
[d3blocks] >INFO> File not found: [file:////var/folders/zr/w_9gpc_15tvbm65ms36b_g680000gn/T/d3blocks/chord.html]
[d3blocks] >INFO> Open browser: file:////var/folders/zr/w_9gpc_15tvbm65ms36b_g680000gn/T/d3blocks/chord.html
[d3blocks] >I