In [1]:
import os
import sys
import numpy as np
import pandas as pd

In [2]:
data_dir = '/your path/multiomics-cardiovascular-disease/data/'
results_dir = '/your path/multiomics-cardiovascular-disease/saved/results/'
log_dir = '/your path/multiomics-cardiovascular-disease/saved/log'
split_times = 10
seed_to_split = 241104
num_workers = 8

split_seed_filename = f"split_seed-{seed_to_split}/"
split_seed_dir = os.path.join(data_dir, split_seed_filename)

In [3]:
observed_event_rate = []

for k in range(10):
    age_sex = pd.read_csv('/your path/multiomics-cardiovascular-disease/data/processed/ukb_merged.csv', usecols=['eid', 'age', 'male'])

    genomics = pd.read_feather(split_seed_dir + f"X_test_Genomics_K{k}.feather")
    genomics.columns = [col.replace("prs_", "") if "prs_" in col else col for col in genomics.columns]
    genomics = genomics.rename(columns={"vte": "vt"})
    genomics['va'] = genomics['af']
    genomics['cvd_death'] = genomics['cad']
    genomics_long = pd.melt(genomics, id_vars=['eid'], var_name='outcome', value_name='prs')
    genomics_long['fold'] = k
    
    metabolomics = pd.read_csv(os.path.join(results_dir, f"MetScore/test_scores_K{k}.csv"))
    metabolomics_long = pd.melt(metabolomics, id_vars=['eid'], var_name='outcome', value_name='metscore')
    
    proteomics = pd.read_csv(os.path.join(results_dir, f"ProScore/test_scores_K{k}.csv"))
    proteomics_long = pd.melt(proteomics, id_vars=['eid'], var_name='outcome', value_name='proscore')
    
    e = pd.read_feather(os.path.join(split_seed_dir, f'e_test_K{k}.feather'))
    e_long = pd.melt(e, id_vars=['eid'], var_name='outcome', value_name='event')
    
    merged_data = pd.merge(genomics_long, metabolomics_long, on=['eid', 'outcome'], how='left')
    merged_data = pd.merge(merged_data, proteomics_long, on=['eid', 'outcome'], how='left')
    merged_data = pd.merge(merged_data, e_long, on=['eid', 'outcome'], how='left')
    merged_data = pd.merge(merged_data, age_sex, on=['eid'], how='left')
    
    observed_event_rate.append(merged_data)

observed_event_rate = pd.concat(observed_event_rate, ignore_index=True)
observed_event_rate.to_csv('observed_event_rate.csv', index=False)
observed_event_rate

Unnamed: 0,eid,outcome,prs,fold,metscore,proscore,event,age,male
0,1002627,cad,-0.513180,0,-2.913559,-3.038017,0,64,0
1,1006647,cad,-1.513317,0,-3.728572,-3.064125,0,44,0
2,1007634,cad,1.964256,0,-2.643847,-2.794146,0,61,0
3,1010327,cad,-1.175524,0,-3.297099,-3.093149,0,47,0
4,1013147,cad,0.632512,0,-3.445893,-2.924177,0,65,0
...,...,...,...,...,...,...,...,...,...
218767,6018531,cvd_death,-1.381985,9,-4.466974,-4.955414,0,46,1
218768,6021114,cvd_death,1.912248,9,-2.998327,-2.645683,0,48,1
218769,6022027,cvd_death,-0.130757,9,-3.974407,-3.487737,0,60,1
218770,6022766,cvd_death,0.543610,9,-4.498716,-4.900415,0,43,1
