# COVbat TOP and StrokeMRI  vs. SABRE harmonized datasets

Note this must be run in the `covbat` environment. Environment file inside vendor/covbat

## import libraries

In [None]:
import os
import sys

import pandas as pd
import numpy as np

import patsy

sys.path.insert(0, '../../') # path to functions

import cvasl.harmony as har
import cvasl.vendor.covbat.covbat as covbat

## import data

In [None]:
# Datasets for this work
MRI_path = '../our_datasets/StrokeMRI/'
TOP_path = '../our_datasets/TOP/'
SABRE_path = '../our_datasets/SABRE/'
file_name = 'TrainingDataComplete.csv'

TOP_file = os.path.join(TOP_path, file_name)
MRI_file = os.path.join(MRI_path, file_name)
SABRE_file = os.path.join(SABRE_path, file_name)

TOP = pd.read_csv(TOP_file, index_col=0)
MRI = pd.read_csv(MRI_file, index_col=0)
SABRE = pd.read_csv(SABRE_file, index_col=0) 

In [None]:
SABRE.head(3)

In [None]:
sex_mapping = {1:0,2:1}
SABRE = SABRE.assign(Sex = SABRE.Sex.map(sex_mapping))
SABRE.head(3)

In [None]:
TOP = TOP.drop([ 'ID'], axis= 1)

TOP.head(3)

In [None]:
MRI = MRI.drop(['ID'],axis = 1)
MRI.tail(5)

In [None]:
SABRE = SABRE.drop(['ID'],axis = 1)
SABRE['Site'] = 3
SABRE.tail(5)

In [None]:
TOPMRI = pd.concat([TOP, MRI])
TOPMRI['Site'] = 2
TOPMRI.head(3) 

In [None]:
TOPMRI.tail(3)

In [None]:
TOPMRISABRE = pd.concat([TOPMRI, SABRE])

In [None]:
phenoTOPMRISABRE = TOPMRISABRE[['participant_id','Age', 'Sex', 'Site']]
phenoTOPMRISABRE = phenoTOPMRISABRE.set_index('participant_id')
phenoTOPMRISABRE.head(3)

In [None]:
dat_TOPMRISABRE = TOPMRISABRE.set_index('participant_id')
dat_TOPMRISABRE = dat_TOPMRISABRE.T 

In [None]:
dat_TOPMRISABRE.head(3)

In [None]:
phenoTOPMRISABRE.head(3)

In [None]:
modZ = patsy.dmatrix("~ Age + Sex", phenoTOPMRISABRE, return_type="dataframe")
modZ.head(3)

In [None]:
covbatTOPMRISABRE = covbat.combat(dat_TOPMRISABRE.tail(17), phenoTOPMRISABRE['Site'],  model=modZ, numerical_covariates ="Age")

In [None]:
covbatTOPMRISABRE = covbatTOPMRISABRE[2:]

In [None]:
dat_TOPMRISABRE.head(3)

In [None]:
covbatTOPMRISABRE = pd.concat([dat_TOPMRISABRE.head(3), covbatTOPMRISABRE])
covbatTOPMRISABRE = covbatTOPMRISABRE.T
covbatTOPMRISABRE = covbatTOPMRISABRE.reset_index()

In [None]:
covbatTOPMRISABRE

In [None]:
TOPMRI_adjusted = covbatTOPMRISABRE[covbatTOPMRISABRE['Site'] == 2]
SABRE_adjusted   = covbatTOPMRISABRE[covbatTOPMRISABRE['Site'] == 3] 

In [None]:
TOPMRI_adjusted = TOPMRI_adjusted.drop('Site', axis=1)
TOPMRI_adjusted.head(3)

In [None]:
SABRE_adjusted = SABRE_adjusted.drop('Site', axis=1)
SABRE_adjusted.tail(3)

In [None]:
TOPMRI_adjusted.to_csv('harm_results/covbat/topmri_covbat_a_SABRE.csv')
SABRE_adjusted.to_csv('harm_results/covbat/sabre_covbat_a_topmri.csv')