# Autocombat  5 way harmonized datasets

Note this must be run in the `comscan6` environment

## import libraries

In [None]:
import os
import sys

import pandas as pd
import numpy as np

sys.path.insert(0, '../../') # path to functions

import cvasl.harmony as har
import cvasl.seperated as sep
import cvasl.vendor.comscan.neurocombat as autocombat

## Let's see how we would apply this to our data
We will have to flip it on it's side to make it work

In [None]:
EDIS = pd.read_csv('../new_data/TrainingDataComplete_EDIS.csv')
HELIUS = pd.read_csv('../new_data/TrainingDataComplete_HELIUS.csv')
SABRE = pd.read_csv('../new_data/TrainingDataComplete_SABRE.csv')
MRI = pd.read_csv('../new_data/TrainingDataComplete_StrokeMRI.csv')
TOP = pd.read_csv('../new_data/TrainingDataComplete_TOP.csv')
Insight46 = pd.read_csv('../new_data/TrainingDataComplete_Insight46.csv')

In [None]:
## We found there is one aprticipant from two seperate sights (HELIUS and SABRE) named the same. There fore we will show and switch this

In [None]:
HELIUS[HELIUS['participant_id']=='sub-153852_1']

In [None]:
SABRE[SABRE['participant_id']=='sub-153852_1']

In [None]:
HELIUS.loc[HELIUS['participant_id']=='sub-153852_1', 'participant_id'] = 'sub-153852_1H'

In [None]:
list_of_dataframes = [
    EDIS,
    HELIUS ,
    SABRE,
    MRI,
    TOP,
    Insight46,
    
]
for frame in list_of_dataframes:
    print(frame.Sex.unique())

In [None]:
### drop ID columns

In [None]:
TOP = TOP.drop([ 'ID'], axis= 1)
TOP.head(3)

In [None]:
MRI = MRI.drop(['ID'],axis = 1)
MRI.tail(5)

In [None]:
EDIS = EDIS.drop(['ID'],axis = 1)
EDIS.tail(5)

In [None]:
SABRE = SABRE.drop(['ID'],axis = 1)
SABRE.tail(5)

In [None]:
HELIUS = HELIUS.drop(['ID'],axis = 1)
HELIUS.tail(5)

In [None]:
Insight46 = Insight46.drop(['ID'],axis = 1)
Insight46.tail(5)

In [None]:
TOPMRI = pd.concat([TOP, MRI])
TOPMRI['Site'] = 0
TOPMRI.head(3) 

In [None]:
datasets = [TOPMRI, HELIUS, SABRE, EDIS, Insight46]
new_frames = sep.deal_with_readout_and_labelling(datasets, ['M0'])

In [None]:
# make sure things are inproper order
for frame in new_frames:
    print(len(frame))
print(len(TOPMRI))
print(len(HELIUS))
print(len(SABRE))

print(len(EDIS))
print(len(Insight46))

In [None]:
#new_frames[0]
TOPMRI = new_frames[0]

SABRE = new_frames[1]
HELIUS = new_frames[2]
EDIS = new_frames[3]
Insight46 = new_frames[4]

In [None]:
new_frame_datasets=  [TOPMRI, HELIUS, SABRE, EDIS, Insight46]
for everyone in new_frame_datasets:
    everyone.columns =  everyone.columns.str.lower()
TOPMRI.head(3)

In [None]:
TOPMRI['site'] = 0
EDIS['site'] = 1
HELIUS['site'] = 2
SABRE['site'] = 3
Insight46['site'] = 4

In [None]:
Insight46.head(3)

In [None]:
FIVEDATA = pd.concat(new_frame_datasets)
FIVEDATA.head(3)

In [None]:
FIVEDATA['decade']=(FIVEDATA['age']/10).round()
#FIVEDATA['decade']

In [None]:
FIVEDATA = FIVEDATA.sort_values(by='age')
FIVEDATA.reset_index(inplace=True)
FIVEDATA['fine_grain'] = FIVEDATA['age'].rolling(2).sum()/2
FIVEDATA

In [None]:
FIVEDATA.fine_grain

In [None]:
FIVEDATA[:].fine_grain.iloc[::2] = FIVEDATA[:].fine_grain.iloc[1::2]
FIVEDATA['fine_grain']

In [None]:
FIVEDATA['decade'].unique()

In [None]:
FIVEDATA.columns

In [None]:
combat = autocombat.Combat(
    features=[ #'gm_vol', 'wm_vol', 'csf_vol',
       #'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
        'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov',
        'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf',],
    sites=[ "site"], discrete_covariates=['sex'],continuous_covariates=['decade'],)

fg_combat = autocombat.Combat(
    features=[ #'gm_vol', 'wm_vol', 'csf_vol',
       #'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
       'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov',
       'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf',],
    sites=[ "site"], discrete_covariates=['sex'],continuous_covariates=['fine_grain'],)


In [None]:
print(combat.fit(FIVEDATA))
print(fg_combat.fit(FIVEDATA))

In [None]:
print(combat.gamma_star_)
print(fg_combat.gamma_star_)

In [None]:
transformed_FIVEDATA= combat.transform(FIVEDATA)
transformed_FIVEDATA.head(3)

In [None]:
FIVEDATA.columns

In [None]:
# check what harmonization did
for column in ['age', 'sex', 'site', 'gm_vol', 'wm_vol',
       'csf_vol', 'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
       'deepwm_b_cov', 'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov',
       'deepwm_b_cbf', 'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf',
       'ld', 'pld', 'labelling', 'readout', 'decade', 'fine_grain']:
        
    print(column, sum(FIVEDATA[column] - transformed_FIVEDATA[column]))
    

In [None]:
fg_transformed_FIVEDATA= fg_combat.transform(FIVEDATA)
fg_transformed_FIVEDATA.head(3)

In [None]:
FIVEDATA.head(3)

In [None]:
TOPMRI_transformed = transformed_FIVEDATA[ transformed_FIVEDATA['site']==0]
TOPMRI_transformed = TOPMRI_transformed.drop(['site', 'decade','fine_grain', 'index'], axis=1)
TOPMRI_transformed.head(3)

In [None]:
fg_TOPMRI_transformed = fg_transformed_FIVEDATA[fg_transformed_FIVEDATA['site']==0]
fg_TOPMRI_transformed = fg_TOPMRI_transformed .drop(['site', 'decade','fine_grain', 'index'], axis=1)
fg_TOPMRI_transformed.head(3)

In [None]:
EDIS_transformed = transformed_FIVEDATA[ transformed_FIVEDATA['site']==1]
EDIS_transformed = EDIS_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
EDIS_transformed.head(3)

In [None]:
HELIUS_transformed = transformed_FIVEDATA[ transformed_FIVEDATA['site']==2]
HELIUS_transformed = HELIUS_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
HELIUS_transformed.head(3)

In [None]:
SABRE_transformed = transformed_FIVEDATA[ transformed_FIVEDATA['site']==3]
SABRE_transformed = SABRE_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
SABRE_transformed.head(3)

In [None]:
Insight_transformed = transformed_FIVEDATA[ transformed_FIVEDATA['site']==4]
Insight_transformed = Insight_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
Insight_transformed.head(3)

In [None]:
fg_EDIS_transformed = fg_transformed_FIVEDATA[fg_transformed_FIVEDATA['site']==1]
fg_EDIS_transformed = fg_EDIS_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
fg_EDIS_transformed.head(3)

In [None]:
fg_HELIUS_transformed = fg_transformed_FIVEDATA[fg_transformed_FIVEDATA['site']==2]
fg_HELIUS_transformed = fg_HELIUS_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
fg_HELIUS_transformed.head(3)

In [None]:
fg_SABRE_transformed = fg_transformed_FIVEDATA[fg_transformed_FIVEDATA['site']==3]
fg_SABRE_transformed = fg_SABRE_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
fg_SABRE_transformed.head(3)

In [None]:
fg_Insight_transformed = fg_transformed_FIVEDATA[fg_transformed_FIVEDATA['site']==4]
fg_Insight_transformed = fg_Insight_transformed.drop(['site', 'decade', 'index', 'fine_grain'], axis=1)
fg_Insight_transformed.head(3)

In [None]:
TOPMRI_transformed.to_csv('harm_results/autocombat/5autocom5_harm_topmri_v_e.csv')
EDIS_transformed.to_csv('harm_results/autocombat/5autocom5_harm_EDIS.csv')
HELIUS_transformed.to_csv('harm_results/autocombat/5autocom5_harm_HELIUS.csv')
SABRE_transformed.to_csv('harm_results/autocombat/5autocom5_harm_SABRE.csv')
Insight_transformed.to_csv('harm_results/autocombat/5autocom5_harm_Insight.csv')

fg_TOPMRI_transformed.to_csv('harm_results/autocombat/5fg_autocom5_harm_topmri_v_e.csv')
fg_EDIS_transformed.to_csv('harm_results/autocombat/5fg_autocom5_harm_EDIS1.csv')
fg_HELIUS_transformed.to_csv('harm_results/autocombat/5fg_autocom5_harm_HELIUS.csv')
fg_SABRE_transformed.to_csv('harm_results/autocombat/5fg_autocom5_harm_SABRE.csv')
fg_Insight_transformed.to_csv('harm_results/autocombat/5fg_autocom5_harm_Insight.csv')