# Various experimental visualizations

## How at baseline do datasets differ, 

We will use different datasets now for convenience

In [None]:
import os       # using operating system dependent functionality (folders)
import sys
import glob
from functools import reduce

import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interactive
import seaborn as sns

sys.path.insert(0, '../../') # path to functions
import cvasl.seperated as sep
from cvasl.file_handler import Config
import cvasl.harmony as har

In [None]:
# Datasets for this work
EDIS_path = '../our_datasets/EDIS/'
HELIUS_path = '../our_datasets/HELIUS/'
Insight46_path = '../our_datasets/Insight46/'
SABRE_path = '../our_datasets/SABRE/'
MRI_path = '../our_datasets/StrokeMRI/'
TOP_path = '../our_datasets/TOP/'
file_name = 'TrainingDataComplete.csv'

TOP_file = os.path.join(TOP_path, file_name)
MRI_file = os.path.join(MRI_path, file_name)
EDIS_file = os.path.join(EDIS_path, file_name)
HELIUS_file = os.path.join(HELIUS_path, file_name)
Insight46_file = os.path.join(Insight46_path, file_name)
SABRE_file = os.path.join(SABRE_path, file_name)

EDIS_n = pd.read_csv(EDIS_file, index_col=0)
HELIUS_n = pd.read_csv(HELIUS_file, index_col=0)
Insight46_n = pd.read_csv(Insight46_file, index_col=0)
SABRE_n = pd.read_csv(SABRE_file, index_col=0)
TOP_n = pd.read_csv(TOP_file, index_col=0)
MRI_n = pd.read_csv(MRI_file, index_col=0)

In [None]:
HELIUS = HELIUS_n.drop(['ID', 'Site'], axis=1)
Insight46 = Insight46_n.drop(['ID', 'Site'], axis=1)
EDIS = EDIS_n.drop(['ID', 'Site'], axis=1)
SABRE = SABRE_n.drop(['ID', 'Site'], axis=1)
StrokeMRI = MRI_n.drop(['ID', 'Site'], axis=1)

In [None]:
TOP = TOP_n.drop(['ID', 'Site'], axis=1)
TOP.head(2)

In [None]:
TOP.columns

In [None]:
HELIUS.head(3)

In [None]:
TOP.columns = TOP.columns.str.lower() 
StrokeMRI.columns = StrokeMRI.columns.str.lower() 
Insight46.columns = Insight46.columns.str.lower() 
EDIS.columns = EDIS.columns.str.lower() 
SABRE.columns= SABRE.columns.str.lower() 
HELIUS.columns= HELIUS.columns.str.lower() 

In [None]:
EDIS.columns

In [None]:
datasets = [EDIS, SABRE, Insight46, TOP, StrokeMRI, HELIUS]
for qset in datasets:
    print(qset.sex.unique())

In [None]:
sex_mapping = {1:0,2:1}
Insight46 = Insight46.assign(sex = Insight46.sex.map(sex_mapping))
EDIS =EDIS.assign(sex = EDIS.sex.map(sex_mapping))
SABRE=SABRE.assign(sex = SABRE.sex.map(sex_mapping))

In [None]:
# check and rename known repeater in HELIUS and SABRE
sabres = set(SABRE.participant_id)
heliar = set(HELIUS.participant_id)
x = sabres.intersection(heliar)
print(x)

In [None]:
HELIUS.loc[HELIUS['participant_id']=='sub-153852_1', 'participant_id'] = 'sub-153852_1H'

In [None]:
sabres = set(SABRE.participant_id)
heliar = set(HELIUS.participant_id)
x = sabres.intersection(heliar)
print(x)

In [None]:
# make mixed StrokeMRI and TOP dataset
mixed_data = pd.concat([TOP, StrokeMRI], sort=False)

In [None]:
mixed_data.age.hist(ls='dashed', lw=3, fc=(0, 0, 1, 0.5)) # blue as in RGB
#TOP.age.hist(ls='dotted', lw=3, fc=(1, 0, 0, 0.3))
Insight46.age.hist(ls='dotted', lw=3, fc=(0, 1, 0, 0.3))
EDIS.age.hist(ls='dotted', lw=3, fc=(1, 0, 0, 0.3))

In [None]:
plt.scatter(mixed_data['age'], mixed_data['gm_vol'], alpha=0.2 )
plt.scatter(SABRE['age'], SABRE['gm_vol'],alpha=0.2 )
plt.scatter(Insight46['age'], Insight46['gm_vol'],alpha=0.2 )

In [None]:
mixed_data.columns

In [None]:
numeric_columns = [ 'age', 'sex', 'gm_vol', 'wm_vol', 'csf_vol',
       'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
       'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov', 'aca_b_cbf',
       'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf']
len(numeric_columns)

In [None]:
mixed_data_np= mixed_data[numeric_columns]

## Reccomend joinplot for excluded data

In [None]:
plt.scatter(mixed_data['age'], mixed_data['gm_vol'],
            alpha=0.4, edgecolors='w')

plt.xlabel('Age')
plt.ylabel('GM VOL')
plt.title('Example graph for excluded data',y=1.05)


# Joint Plot
jp = sns.jointplot( data = mixed_data, x='age', y='gm_vol',
                   kind='reg', space=0, size=5, ratio=4)

In [None]:
mixed_data[numeric_columns].hist(bins=15, color='steelblue', edgecolor='black', linewidth=1.0,
           xlabelsize=8, ylabelsize=8, grid=False)    
plt.tight_layout(rect=(0, 0, 1.2, 1.2))   

In [None]:
## Harmonized outputs plotting