# Various experimental visualizations

## How at baseline do datasets differ, 

We will use different datasets now for convenience; must be run in graphing_env environment

In [None]:
import os       # using operating system dependent functionality (folders)
import sys
import glob
from functools import reduce

import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interactive
import seaborn as sns
from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider

sys.path.insert(0, '../../') # path to functions
import cvasl.harmony as har

In [None]:
datasets_names = ['EDIS', 'SABRE', 'Insight46', 'TOP', 'StrokeMRI', 'HELIUS','TOPMRI']

In [None]:
# form_item_layout = Layout(
#     display='flex',
#     flex_flow='row',
#     justify_content='space-between'
# )

# form_items = [
    
#     Box([Label(value='Dataset1'),
#          Dropdown(options=datasets_names)], layout=form_item_layout),
#     Box([Label(value='Label dataset 1'),
#          Textarea()], layout=form_item_layout),
#     Box([Label(value='Dataset2'),
#          Dropdown(options=datasets_names)], layout=form_item_layout),
#     Box([Label(value='label dataset 2'),
#          Textarea()], layout=form_item_layout),
#     Box([Label(value='feature1'),
#          Dropdown(options=list(features))], layout=form_item_layout),
#     Box([Label(value='feature2'),
#          Dropdown(options=list(features))], layout=form_item_layout),
    
# ]

# form = Box(form_items, layout=Layout(
#     display='flex',
#     flex_flow='column',
#     border='solid 2px',
#     align_items='stretch',
#     width='60%'
# ))


# form

In [None]:
#Dataset2.value

In [None]:
#features

In [None]:
# Datasets for this work
EDIS_path = '../our_datasets/EDIS/'
HELIUS_path = '../our_datasets/HELIUS/'
Insight46_path = '../our_datasets/Insight46/'
SABRE_path = '../our_datasets/SABRE/'
MRI_path = '../our_datasets/StrokeMRI/'
TOP_path = '../our_datasets/TOP/'
file_name = 'TrainingDataComplete.csv'

TOP_file = os.path.join(TOP_path, file_name)
MRI_file = os.path.join(MRI_path, file_name)
EDIS_file = os.path.join(EDIS_path, file_name)
HELIUS_file = os.path.join(HELIUS_path, file_name)
Insight46_file = os.path.join(Insight46_path, file_name)
SABRE_file = os.path.join(SABRE_path, file_name)

EDIS_n = pd.read_csv(EDIS_file, index_col=0)
HELIUS_n = pd.read_csv(HELIUS_file, index_col=0)
Insight46_n = pd.read_csv(Insight46_file, index_col=0)
SABRE_n = pd.read_csv(SABRE_file, index_col=0)
TOP_n = pd.read_csv(TOP_file, index_col=0)
MRI_n = pd.read_csv(MRI_file, index_col=0)

In [None]:
HELIUS = HELIUS_n.drop(['ID', 'Site'], axis=1)
Insight46 = Insight46_n.drop(['ID', 'Site'], axis=1)
EDIS = EDIS_n.drop(['ID', 'Site'], axis=1)
SABRE = SABRE_n.drop(['ID', 'Site'], axis=1)
StrokeMRI = MRI_n.drop(['ID', 'Site'], axis=1)

In [None]:
# features = EDIS.columns
# features =list(features)

In [None]:
TOP = TOP_n.drop(['ID', 'Site'], axis=1)
TOP.head(2)

In [None]:
TOP.columns

In [None]:
HELIUS.head(3)

In [None]:
TOPMRI= pd.concat([TOP, StrokeMRI], sort=False)

In [None]:
TOP.columns = TOP.columns.str.lower() 
TOPMRI.columns = TOPMRI.columns.str.lower()
StrokeMRI.columns = StrokeMRI.columns.str.lower() 
Insight46.columns = Insight46.columns.str.lower() 
EDIS.columns = EDIS.columns.str.lower() 
SABRE.columns= SABRE.columns.str.lower() 
HELIUS.columns= HELIUS.columns.str.lower() 

In [None]:
EDIS.columns

In [None]:
features = EDIS.columns
features =list(features)

In [None]:
datasets = [EDIS, SABRE, Insight46, TOP, StrokeMRI, HELIUS, TOPMRI]
for qset in datasets:
    print(qset.sex.unique())

In [None]:
sex_mapping = {1:0,2:1}
Insight46 = Insight46.assign(sex = Insight46.sex.map(sex_mapping))
EDIS =EDIS.assign(sex = EDIS.sex.map(sex_mapping))
SABRE=SABRE.assign(sex = SABRE.sex.map(sex_mapping))

In [None]:
# check and rename known repeater in HELIUS and SABRE
sabres = set(SABRE.participant_id)
heliar = set(HELIUS.participant_id)
x = sabres.intersection(heliar)
print(x)

In [None]:
dataset_dictionary= {
    'SABRE':SABRE,
    'EDIS':EDIS,
    'TOP':TOP,
    'HELIUS':HELIUS,
    'StrokeMRI':StrokeMRI,
    'Insight46':Insight46,
    'TOPMRI': TOPMRI
    
}

In [None]:
HELIUS.loc[HELIUS['participant_id']=='sub-153852_1', 'participant_id'] = 'sub-153852_1H'

In [None]:
sabres = set(SABRE.participant_id)
heliar = set(HELIUS.participant_id)
x = sabres.intersection(heliar)
print(x)

In [None]:
# make mixed StrokeMRI and TOP dataset
mixed_data = pd.concat([TOP, StrokeMRI], sort=False)


In [None]:
mixed_data.age.hist(ls='dashed', lw=3, fc=(0, 0, 1, 0.5)) # blue as in RGB
#TOP.age.hist(ls='dotted', lw=3, fc=(1, 0, 0, 0.3))
Insight46.age.hist(ls='dotted', lw=3, fc=(0, 1, 0, 0.3))
EDIS.age.hist(ls='dotted', lw=3, fc=(1, 0, 0, 0.3))

In [None]:
plt.scatter(mixed_data['age'], mixed_data['gm_vol'], alpha=0.2 )
plt.scatter(SABRE['age'], SABRE['gm_vol'],alpha=0.2 )
plt.scatter(Insight46['age'], Insight46['gm_vol'],alpha=0.2 )

In [None]:
mixed_data.columns

In [None]:
numeric_columns = [ 'age', 'sex', 'gm_vol', 'wm_vol', 'csf_vol',
       'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
       'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov', 'aca_b_cbf',
       'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf']
len(numeric_columns)

In [None]:
mixed_data_np= mixed_data[numeric_columns]

In [None]:
mixed_data_np =mixed_data_np.reset_index()
mixed_data_np.index

## Reccomend joinplot for excluded data

In [None]:
Dataset1 = widgets.Select(
    options=datasets_names,
    value='EDIS',
    description='Dataset 1:',
    disabled=False
)
Dataset2 = widgets.Select(
    options=datasets_names,
    value='SABRE',
    description='Dataset 2:',
    disabled=False
)
Feature1 = widgets.Select(
    options=features,
    value='age',
    description='Feature 1:',
    disabled=False
)
Feature2 = widgets.Select(
    options=features,
    value='gm_vol',
    description='Feature 2:',
    disabled=False
)
label_dataset1 = widgets.Textarea(
    value='EDIS',
    placeholder='Type something',
    description='label dataset1:',
    disabled=False
)
label_dataset2 = widgets.Textarea(
    value='SABRE',
    placeholder='Type something',
    description='String:',
    disabled=False
)
box2 = Box(children=[label_dataset1, label_dataset2])

box = Box(children=[Dataset1, Dataset2, Feature1, Feature2])
box

In [None]:
box2

In [None]:
har.show_diff_on_var(
    dataset_dictionary[Dataset1.value],
    label_dataset1.value,
    dataset_dictionary[Dataset2.value],
    label_dataset2.value,
    Feature1.value,
    Feature2.value)

In [None]:
har.show_diff_on_var3(EDIS,
    'EDIS',
    TOPMRI,
    'TOPMRI',              
    SABRE,
    'SABRE',
    'age',
    'gm_vol',
)

In [None]:
har.show_diff_on_var5(EDIS,
    'EDIS',
    TOPMRI,
    'TOPMRI',   
    HELIUS,
    'HELIUS',
    Insight46,
    'Insight46', 
    SABRE,
    'SABRE',
    'age',
    'gm_vol',
)