# Neuroharmonize datasets

This is a notebook to apply neuroHarmonize: a ComBat-GAM  non-linear allowing algorithm over our data to create neuroHarmonized datasets. It should be run in the `neurogamy` environment.

In [None]:
import os
from neuroHarmonize import harmonizationLearn
import pandas as pd
import numpy as np

In [None]:
EDIS = pd.read_csv('../new_data/TrainingDataComplete_EDIS.csv')
HELIUS = pd.read_csv('../new_data/TrainingDataComplete_HELIUS.csv')
SABRE = pd.read_csv('../new_data/TrainingDataComplete_SABRE.csv')
MRI = pd.read_csv('../new_data/TrainingDataComplete_StrokeMRI.csv')
TOP = pd.read_csv('../new_data/TrainingDataComplete_TOP.csv')
Insight46 = pd.read_csv('../new_data/TrainingDataComplete_Insight46.csv')
#TOP

In [None]:
HELIUS.tail(3)

In [None]:
TOP['Site'] = 0
MRI['Site'] = 1
MRI.head(3)

In [None]:
## We found there is one aprticipant from two seperate sights (HELIUS and SABRE) named the same. There fore we will show and switch this

In [None]:
HELIUS[HELIUS['participant_id']=='sub-153852_1']

In [None]:
SABRE[SABRE['participant_id']=='sub-153852_1']

In [None]:
HELIUS.loc[HELIUS['participant_id']=='sub-153852_1', 'participant_id'] = 'sub-153852_1H'

In [None]:
TOP.head(3)

# Here we will harmonize just TOP and StrokeMRI

In [None]:
TOPMRI = pd.concat([TOP, MRI])
TOPMRI.head(3) 

In [None]:
TOPMRI.tail(3)

In [None]:
TOPMRI.columns

We will transform M0 to a 1 for no, and 2 for yes.  We will make 3D spiral 1, and PCASL labelling 1

In [None]:
#TOPMRI.readout.unique()

In [None]:
TOPMRI.columns= TOPMRI.columns.str.lower()

In [None]:
TOPMRI.labelling.unique()

In [None]:
TOPMRI.m0 = TOPMRI.m0.replace(to_replace=['No', 'Yes'], value=[1, 2])
TOPMRI.readout = TOPMRI.readout.replace(to_replace=['3DSpiral'], value=[1])
TOPMRI.labelling = TOPMRI.labelling.replace(to_replace=['PCASL'], value=[1])

In [None]:
TOP.M0 = TOP.M0.replace(to_replace=['No', 'Yes'], value=[1, 2])
TOP.Readout = TOP.Readout.replace(to_replace=['3DSpiral'], value=[1])
TOP.Labelling = TOP.Labelling.replace(to_replace=['PCASL'], value=[1])

In [None]:
MRI.M0 = MRI.M0.replace(to_replace=['No', 'Yes'], value=[1, 2])
MRI.Readout = MRI.Readout.replace(to_replace=['3DSpiral'], value=[1])
MRI.Labelling = MRI.Labelling.replace(to_replace=['PCASL'], value=[1])

In [None]:
HELIUS.M0 = HELIUS.M0.replace(to_replace=['No', 'Yes'], value=[1, 2])
HELIUS.Readout = HELIUS.Readout.replace(to_replace=['2DEPI'], value=[4])
HELIUS.Labelling = HELIUS.Labelling.replace(to_replace=['PCASL'], value=[1])
HELIUS

In [None]:
EDIS.M0 = EDIS.M0.replace(to_replace=['No', 'Yes'], value=[1, 2])
EDIS.Readout = EDIS.Readout.replace(to_replace=['2DEPI'], value=[4])
EDIS.Labelling = EDIS.Labelling.replace(to_replace=['PASL'], value=[1])


In [None]:
Insight46.M0 = Insight46.M0.replace(to_replace=['No', 'Yes'], value=[1, 2])
Insight46.Readout = Insight46.Readout.replace(to_replace=['3DGRASE'], value=[3])
Insight46.Labelling = Insight46.Labelling.replace(to_replace=['PCASL'], value=[1])
#Insight46

In [None]:
SABRE.M0 = SABRE.M0.replace(to_replace=['No', 'Yes'], value=[1, 2])
SABRE.Readout = SABRE.Readout.replace(to_replace=['2DEPI'], value=[2])
SABRE.Labelling = SABRE.Labelling.replace(to_replace=['PCASL'], value=[1])

In [None]:
TOPMRI.m0

In [None]:
# for column in TOPMRI.columns:
#     print(column)
#     print(type(TOPMRI[column].head(1).item()))

In [None]:
common_features = ['gm_vol', 'wm_vol',
       'csf_vol', 'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
       'deepwm_b_cov', 'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov',
       'deepwm_b_cbf', 'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf',
       'ld', 'pld', 'labelling', 'readout', 'm0']

In [None]:
TOPMRI_features = TOPMRI[common_features]
TOPMRI_covariates = TOPMRI[['age', 'sex','site']]
TOPMRI_covariates = TOPMRI_covariates.rename(columns={'site': 'SITE'})
TOPMRI_covariates.head(3)

In [None]:
#TOPMRI_covariates.reset_index()

In [None]:
TOPMRI_features_array = np.array(TOPMRI_features)
TOPMRI_features_array


In [None]:
# run harmonization and PUT the adjusted data into my_ad_data vaiable
my_model, my_data_adj = harmonizationLearn(TOPMRI_features_array, TOPMRI_covariates)

In [None]:
# turn adjusted data into dataframe with column names, then add covariates, then participant IDs
neuroharmonized_topmri = pd.DataFrame(
    my_data_adj, 
    columns = ['gm_vol', 'wm_vol',
       'csf_vol', 'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
       'deepwm_b_cov', 'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov',
       'deepwm_b_cbf', 'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf',
       'ld', 'pld', 'labelling', 'readout', 'm0']
)


neuroharmonized_topmri =pd.concat([neuroharmonized_topmri, TOPMRI_covariates.reset_index()], axis=1)
neuroharmonized_topmri = neuroharmonized_topmri.drop('index', axis=1)
neuroharmonized_topmri = pd.concat([neuroharmonized_topmri, TOPMRI.participant_id.reset_index()], axis=1)
neuroharmonized_topmri = neuroharmonized_topmri.drop('index', axis=1)
neuroharmonized_topmri.head(3)

In [None]:
# create adjusted csvs
top_neuroharm_to_stroke = neuroharmonized_topmri[neuroharmonized_topmri.SITE == 0]
stroke_neuroharm_to_top = neuroharmonized_topmri[neuroharmonized_topmri.SITE == 1] 

In [None]:
TOP.head(3)

In [None]:
# reorganize to familair pattern
column_to_move1 = stroke_neuroharm_to_top.pop("participant_id")
column_to_move2 = stroke_neuroharm_to_top.pop("age")
column_to_move3 = stroke_neuroharm_to_top.pop("sex")
tcolumn_to_move1 = top_neuroharm_to_stroke.pop("participant_id")
tcolumn_to_move2 = top_neuroharm_to_stroke.pop("age")
tcolumn_to_move3 = top_neuroharm_to_stroke.pop("sex")
stroke_neuroharm_to_top.insert(0, "participant_id", column_to_move1)
stroke_neuroharm_to_top.insert(1, "age", column_to_move2)
stroke_neuroharm_to_top.insert(2, "sex", column_to_move3)
top_neuroharm_to_stroke.insert(0, "participant_id", tcolumn_to_move1)
top_neuroharm_to_stroke.insert(1, "age", tcolumn_to_move2)
top_neuroharm_to_stroke.insert(2, "sex", tcolumn_to_move3)

In [None]:
top_neuroharm_to_stroke = top_neuroharm_to_stroke.drop('SITE', axis=1)
stroke_neuroharm_to_top = stroke_neuroharm_to_top.drop('SITE', axis=1)
stroke_neuroharm_to_top.columns

In [None]:
top_neuroharm_to_stroke.to_csv('harm_results/neurocharm/top_neuroharm_to_stroke.csv')
stroke_neuroharm_to_top.to_csv('harm_results/neurocharm/stroke_neuroharm_to_top.csv')

In [None]:
print(type(top_neuroharm_to_stroke))

In [None]:
# (sabre_vs_topmri_only[['gm_vol', 'wm_vol', 'csf_vol',
#        'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol', 'wmh_count',
#        'deepwm_b_cov', 'aca_b_cov', 'mca_b_cov', 'pca_b_cov', 'totalgm_b_cov',
#        'deepwm_b_cbf', 'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf', 'totalgm_b_cbf',]] <0).sum()

# Now we join top and StrokeMRI to one dataset and harmonize to other datasets individually

In [None]:
unified_TOPMRI = TOPMRI.copy(deep=True)
unified_TOPMRI = unified_TOPMRI.reset_index()
unified_TOPMRI['site'] = 0
unified_TOPMRI= unified_TOPMRI.drop('index', axis=1)
unified_TOPMRI.head(3)

In [None]:
#SABRE = SABRE.drop('Unnamed: 0', axis=1)
#SABRE = SABRE.assign(sex = SABRE.sex.map(sex_mapping))
#SABRE['site'] = 2
SABRE.columns = SABRE.columns.str.lower()
SABRE.head(2) 

In [None]:
SABRE['site'] = 2
SABRE.head(3)

In [None]:
TOPMRIvsSABRE= pd.concat([unified_TOPMRI, SABRE])
TOPMRIvsSABRE = TOPMRIvsSABRE.reset_index()

In [None]:
TOPMRIvsSABRE.columns

In [None]:
TOPMRIvsSABRE_covariates = TOPMRIvsSABRE[['age', 'sex','site']]
TOPMRIvsSABRE_covariates = TOPMRIvsSABRE_covariates.rename(columns={'site': 'SITE'})
TOPMRIvsSABRE_covariates.head(3)

In [None]:
 TOPMRIvsSABRE_features = TOPMRIvsSABRE[common_features]
TOPMRIvsSABRE_features_array = np.array(TOPMRIvsSABRE_features)

In [None]:
my_model2, my_data_adj2 = harmonizationLearn(TOPMRIvsSABRE_features_array, TOPMRIvsSABRE_covariates)

In [None]:
my_data_adj2

In [None]:
# turn adjusted data into dataframe with column names, then add covariates, then participant IDs
neuroharmonized_topmrivsabre = pd.DataFrame(
    my_data_adj2, 
    columns = common_features
)
neuroharmonized_topmrivsabre =pd.concat([neuroharmonized_topmrivsabre, TOPMRIvsSABRE_covariates.reset_index()], axis=1)
neuroharmonized_topmrivsabre = neuroharmonized_topmrivsabre.drop('index', axis=1)
neuroharmonized_topmrivsabre = pd.concat([neuroharmonized_topmrivsabre, TOPMRIvsSABRE.participant_id.reset_index()], axis=1)
neuroharmonized_topmrivsabre = neuroharmonized_topmrivsabre.drop('index', axis=1)
neuroharmonized_topmrivsabre.head(3)

In [None]:
# reorganize to familair pattern
column_to_move1 = neuroharmonized_topmrivsabre.pop("participant_id")
column_to_move2 = neuroharmonized_topmrivsabre.pop("age")
column_to_move3 = neuroharmonized_topmrivsabre.pop("sex")
neuroharmonized_topmrivsabre.insert(0, "participant_id", column_to_move1)
neuroharmonized_topmrivsabre.insert(1, "age", column_to_move2)
neuroharmonized_topmrivsabre.insert(2, "sex", column_to_move3)

In [None]:
# create adjusted csvs
sabre_vs_topmri_only = neuroharmonized_topmrivsabre[neuroharmonized_topmrivsabre.SITE == 2]
topmri_vs_sabre_only = neuroharmonized_topmrivsabre[neuroharmonized_topmrivsabre.SITE == 0] 

In [None]:
#topmri_vs_sabre_only

In [None]:
sabre_vs_topmri_only = sabre_vs_topmri_only.drop('SITE', axis=1)
topmri_vs_sabre_only  = topmri_vs_sabre_only .drop('SITE', axis=1)
#topmri_vs_sabre_only.columns

In [None]:
#sabre_vs_topmri_only

In [None]:
sabre_vs_topmri_only.to_csv('harm_results/neurocharm/sabre_vs_topmri_only.csv') 
topmri_vs_sabre_only.to_csv('harm_results/neurocharm/topmri_vs_sabre_only.csv') 

In [None]:
Insight46['Site'] = 3
Insight46.head(2) 

In [None]:
Insight46.columns = Insight46.columns.str.lower()

In [None]:
TOPMRIvsInsight46= pd.concat([unified_TOPMRI, Insight46])
TOPMRIvsInsight46 = TOPMRIvsInsight46.reset_index()

In [None]:
TOPMRIvsInsight_covariates = TOPMRIvsInsight46[['age', 'sex','site']]
TOPMRIvsInsight_covariates = TOPMRIvsInsight_covariates .rename(columns={'site': 'SITE'})
TOPMRIvsInsight_covariates.head(3)

In [None]:
 TOPMRIvsInsight_features = TOPMRIvsInsight46[common_features]
TOPMRIvsInsight_features_array = np.array(TOPMRIvsInsight_features)

In [None]:
my_model3, my_data_adj3 = harmonizationLearn(TOPMRIvsInsight_features_array, TOPMRIvsInsight_covariates)

In [None]:
my_data_adj3

In [None]:
# turn adjusted data into dataframe with column names, then add covariates, then participant IDs
neuroharmonized_topmrivinsight = pd.DataFrame(
    my_data_adj3, 
    columns = common_features
)
neuroharmonized_topmrivinsight = pd.concat([neuroharmonized_topmrivinsight, TOPMRIvsInsight_covariates.reset_index()], axis=1)
neuroharmonized_topmrivinsight = neuroharmonized_topmrivinsight.drop('index', axis=1)
neuroharmonized_topmrivinsight = pd.concat([neuroharmonized_topmrivinsight, TOPMRIvsInsight46.participant_id.reset_index()], axis=1)
neuroharmonized_topmrivinsight = neuroharmonized_topmrivinsight.drop('index', axis=1)
neuroharmonized_topmrivinsight.head(3)

In [None]:
#neuroharmonized_topmrivinsight

In [None]:
# reorganize to familair pattern
column_to_move1 = neuroharmonized_topmrivinsight.pop("participant_id")
column_to_move2 = neuroharmonized_topmrivinsight.pop("age")
column_to_move3 = neuroharmonized_topmrivinsight.pop("sex")
neuroharmonized_topmrivinsight.insert(0, "participant_id", column_to_move1)
neuroharmonized_topmrivinsight.insert(1, "age", column_to_move2)
neuroharmonized_topmrivinsight.insert(2, "sex", column_to_move3)

In [None]:
# create adjusted csvs
insight_vs_topmri_only = neuroharmonized_topmrivinsight[neuroharmonized_topmrivinsight.SITE == 3]
topmri_vs_insight_only = neuroharmonized_topmrivinsight[neuroharmonized_topmrivinsight.SITE == 0] 

In [None]:
insight_vs_topmri_only  = insight_vs_topmri_only.drop('SITE', axis=1)
topmri_vs_insight_only  = topmri_vs_insight_only.drop('SITE', axis=1)


In [None]:
insight_vs_topmri_only

In [None]:
insight_vs_topmri_only.to_csv('harm_results/neurocharm/insight_vs_topmri_only.csv') 
topmri_vs_insight_only.to_csv('harm_results/neurocharm/topmri_vs_insight_only.csv') 

In [None]:
EDIS['Site'] = 4
EDIS.head(2) 

In [None]:
EDIS.columns = EDIS.columns.str.lower()

In [None]:
TOPMRIvsEDIS = pd.concat([unified_TOPMRI, EDIS])
TOPMRIvsEDIS = TOPMRIvsEDIS.reset_index()

In [None]:
TOPMRIvsEDIS_covariates = TOPMRIvsEDIS[['age', 'sex','site']]
TOPMRIvsEDIS_covariates = TOPMRIvsEDIS_covariates .rename(columns={'site': 'SITE'})
TOPMRIvsEDIS_covariates.head(3)

In [None]:
TOPMRIvsEDIS_features = TOPMRIvsEDIS[common_features]
TOPMRIvsEDIS_features_array = np.array(TOPMRIvsEDIS_features)

In [None]:
my_model4, my_data_adj4 = harmonizationLearn(TOPMRIvsEDIS_features_array, TOPMRIvsEDIS_covariates)

In [None]:
my_data_adj4

In [None]:
# turn adjusted data into dataframe with column names, then add covariates, then participant IDs
neuroharmonized_topmrivedis = pd.DataFrame(
    my_data_adj4, 
    columns = common_features
)
neuroharmonized_topmrivedis = pd.concat([neuroharmonized_topmrivedis, TOPMRIvsEDIS_covariates.reset_index()], axis=1)
neuroharmonized_topmrivedis = neuroharmonized_topmrivedis.drop('index', axis=1)
neuroharmonized_topmrivedis = pd.concat([neuroharmonized_topmrivedis, TOPMRIvsEDIS.participant_id.reset_index()], axis=1)
neuroharmonized_topmrivedis = neuroharmonized_topmrivedis.drop('index', axis=1)
neuroharmonized_topmrivedis.head(3)

In [None]:
# reorganize to familair pattern
column_to_move1 = neuroharmonized_topmrivedis.pop("participant_id")
column_to_move2 = neuroharmonized_topmrivedis.pop("age")
column_to_move3 = neuroharmonized_topmrivedis.pop("sex")
neuroharmonized_topmrivedis.insert(0, "participant_id", column_to_move1)
neuroharmonized_topmrivedis.insert(1, "age", column_to_move2)
neuroharmonized_topmrivedis.insert(2, "sex", column_to_move3)

In [None]:
# create adjusted csvs
edis_vs_topmri_only = neuroharmonized_topmrivedis[neuroharmonized_topmrivedis.SITE == 4]
topmri_vs_edis_only = neuroharmonized_topmrivedis[neuroharmonized_topmrivedis.SITE == 0] 

In [None]:
edis_vs_topmri_only  = edis_vs_topmri_only.drop('SITE', axis=1)
topmri_vs_edis_only  = topmri_vs_edis_only.drop('SITE', axis=1)


In [None]:
#edis_vs_topmri_only

In [None]:
edis_vs_topmri_only.to_csv('harm_results/neurocharm/edis_vs_topmri_only.csv') 
topmri_vs_edis_only.to_csv('harm_results/neurocharm/topmri_vs_edis_only.csv') 

In [None]:
HELIUS['Site'] = 5
HELIUS.head(2) 

In [None]:
HELIUS.columns = HELIUS.columns.str.lower()

In [None]:
TOPMRIvsHELIUS = pd.concat([unified_TOPMRI, HELIUS])
TOPMRIvsHELIUS = TOPMRIvsHELIUS.reset_index()

In [None]:
TOPMRIvsHELIUS_covariates = TOPMRIvsHELIUS[['age', 'sex','site']]
TOPMRIvsHELIUS_covariates = TOPMRIvsHELIUS_covariates .rename(columns={'site': 'SITE'})
TOPMRIvsHELIUS_covariates.head(3)

In [None]:
TOPMRIvsHELIUS_features = TOPMRIvsHELIUS[common_features]
TOPMRIvsHELIUS_features_array = np.array(TOPMRIvsHELIUS_features)

In [None]:
my_model5, my_data_adj5 = harmonizationLearn(TOPMRIvsHELIUS_features_array, TOPMRIvsHELIUS_covariates)

In [None]:
my_data_adj5

In [None]:
# turn adjusted data into dataframe with column names, then add covariates, then participant IDs
neuroharmonized_topmrivhelius = pd.DataFrame(
    my_data_adj5, 
    columns = common_features
)
neuroharmonized_topmrivhelius = pd.concat([neuroharmonized_topmrivhelius, TOPMRIvsHELIUS_covariates.reset_index()], axis=1)
neuroharmonized_topmrivhelius = neuroharmonized_topmrivhelius.drop('index', axis=1)
neuroharmonized_topmrivhelius = pd.concat([neuroharmonized_topmrivhelius, TOPMRIvsHELIUS.participant_id.reset_index()], axis=1)
neuroharmonized_topmrivhelius = neuroharmonized_topmrivhelius.drop('index', axis=1)
neuroharmonized_topmrivhelius.head(3)

In [None]:
# reorganize to familair pattern
column_to_move1 = neuroharmonized_topmrivhelius.pop("participant_id")
column_to_move2 = neuroharmonized_topmrivhelius.pop("age")
column_to_move3 = neuroharmonized_topmrivhelius.pop("sex")
neuroharmonized_topmrivhelius.insert(0, "participant_id", column_to_move1)
neuroharmonized_topmrivhelius.insert(1, "age", column_to_move2)
neuroharmonized_topmrivhelius.insert(2, "sex", column_to_move3)

In [None]:
neuroharmonized_topmrivhelius

In [None]:
# create adjusted csvs
helius_vs_topmri_only = neuroharmonized_topmrivhelius[neuroharmonized_topmrivhelius.SITE == 5]
topmri_vs_helius_only = neuroharmonized_topmrivhelius[neuroharmonized_topmrivhelius.SITE == 0] 

In [None]:
helius_vs_topmri_only  = helius_vs_topmri_only.drop('SITE', axis=1)
topmri_vs_helius_only  = topmri_vs_helius_only.drop('SITE', axis=1)


In [None]:
helius_vs_topmri_only

In [None]:
helius_vs_topmri_only.to_csv('harm_results/neurocharm/helius_vs_topmri_only.csv') 
topmri_vs_helius_only.to_csv('harm_results/neurocharm/topmri_vs_helius_only.csv') 

# Now we will do five way harmonization- TOPMRI vs. SAbre vs. Insigh46 vs. EDIS vs. others

In [None]:
#this is all pending from here forward, we await the permissions on the other dataset

In [None]:
TOPMRIvsSABRvsInisghvsEDISvsHELIUS = pd.concat([unified_TOPMRI, SABRE, Insight46, EDIS, HELIUS])
TOPMRIvsSABRvsInisghvsEDISvsHELIUS = TOPMRIvsSABRvsInisghvsEDISvsHELIUS.reset_index()

In [None]:
TOPMRIvsSABRvsInisghvsEDISvsHELIUS_covariates = TOPMRIvsSABRvsInisghvsEDISvsHELIUS[['age', 'sex','site']]
TOPMRIvsSABRvsInisghvsEDISvsHELIUS_covariates = TOPMRIvsSABRvsInisghvsEDISvsHELIUS_covariates.rename(columns={'site': 'SITE'})
TOPMRIvsSABRvsInisghvsEDISvsHELIUS_covariates.head(3)

In [None]:
TOPMRIvsSABRvsInisghvsEDISvsHELIUS.columns

In [None]:
TOPMRIvsSABRvsInisghvsEDISvsHELIUS_features = TOPMRIvsSABRvsInisghvsEDISvsHELIUS[[ 
     'gm_vol',
       'wm_vol', 'csf_vol', 'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol',
       'wmh_count', 'aca_b_cov', 'mca_b_cov', 'pca_b_cov',
       'totalgm_b_cov', 'aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf',
       'totalgm_b_cbf'
]]
TOPMRIvsSABRvsInisghvsEDISvsHELIUS_features_array = np.array(TOPMRIvsSABRvsInisghvsEDISvsHELIUS_features)

In [None]:
my_modelA, my_data_adjA = harmonizationLearn(TOPMRIvsSABRvsInisghvsEDISvsHELIUS_features_array, TOPMRIvsSABRvsInisghvsEDISvsHELIUS_covariates)

In [None]:
my_data_adjA

In [None]:
# turn adjusted data into dataframe with column names, then add covariates, then participant IDs
neuroharmonized_A= pd.DataFrame(
    my_data_adjA, 
    columns = ['gm_vol',
       'wm_vol', 'csf_vol', 'gm_icvratio', 'gmwm_icvratio', 'wmhvol_wmvol',
       'wmh_count', 'aca_b_cov', 'mca_b_cov', 'pca_b_cov',
       'totalgm_b_cov','aca_b_cbf', 'mca_b_cbf', 'pca_b_cbf',
       'totalgm_b_cbf']
)
neuroharmonized_A =pd.concat([neuroharmonized_A, TOPMRIvsSABRvsInisghvsEDISvsHELIUS_covariates .reset_index()], axis=1)
neuroharmonized_A = neuroharmonized_A.drop('index', axis=1)
neuroharmonized_A = pd.concat([neuroharmonized_A, TOPMRIvsSABRvsInisghvsEDISvsHELIUS.participant_id.reset_index()], axis=1)
neuroharmonized_A = neuroharmonized_A.drop('index', axis=1)
neuroharmonized_A.head(3)

In [None]:
# reorganize to familair pattern
column_to_move1 = neuroharmonized_A.pop("participant_id")
column_to_move2 = neuroharmonized_A.pop("age")
column_to_move3 = neuroharmonized_A.pop("sex")
neuroharmonized_A.insert(0, "participant_id", column_to_move1)
neuroharmonized_A.insert(1, "age", column_to_move2)
neuroharmonized_A.insert(2, "sex", column_to_move3)

In [None]:
# create adjusted csvs
insight_vs_topmri_5way= neuroharmonized_A[neuroharmonized_A.SITE == 3]
sabre_vs_topmri_5way = neuroharmonized_A[neuroharmonized_A.SITE == 2]
topmri_vs_sabre_5way = neuroharmonized_A[neuroharmonized_A.SITE == 0] 
edis_vs_topmri_5way= neuroharmonized_A[neuroharmonized_A.SITE == 4]
helius_vs_topmri_5way = neuroharmonized_A[neuroharmonized_A.SITE == 5]

In [None]:
insight_vs_topmri_5way = insight_vs_topmri_5way.drop('SITE', axis=1)
sabre_vs_topmri_5way = sabre_vs_topmri_5way.drop('SITE', axis=1) 
topmri_vs_sabre_5way = topmri_vs_sabre_5way.drop('SITE', axis=1)  

edis_vs_topmri_5way = edis_vs_topmri_5way.drop('SITE', axis=1) 
helius_vs_topmri_5way = helius_vs_topmri_5way.drop('SITE', axis=1) 

In [None]:
helius_vs_topmri_5way 

In [None]:
insight_vs_topmri_5way.to_csv('harm_results/neurocharm/insight_vs_topmri_5way.csv')
helius_vs_topmri_5way.to_csv('harm_results/neurocharm/helius_vs_topmri_5way.csv')
sabre_vs_topmri_5way.to_csv('harm_results/neurocharm/sabre_vs_topmri_5way.csv')
topmri_vs_sabre_5way.to_csv('harm_results/neurocharm/topmri_vs_sabre_5way.csv')
edis_vs_topmri_5way.to_csv('harm_results/neurocharm/edis_vs_topmri_5way.csv')

In [None]:
# end of notebook!

In [None]:
insight_vs_topmri_5way