### This script select the IDs with:
- Physical activity
- Diabetes
- Myocardial infacrtion
- Cardiomyopathy

In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
# Reading IDs
common_ids = pd.read_csv('./common_10k_with_67k_fundus_32k_cmr_MTDT.csv') 
# Reading exercise IDs
exercise_ids = pd.read_csv('./physical_activity.csv') 
# Reading myocardial
myocardial_ids = pd.read_csv('./myocardial_infa.csv') 

In [None]:
print(myocardial_ids.columns)

In [None]:
# Taking the common IDs
exercise_ids = exercise_ids[exercise_ids.eid.isin(common_ids.ID.values)]
myocardial_ids = myocardial_ids[myocardial_ids.eid.isin(common_ids.ID.values)]

In [None]:
exercise_ids.to_csv('new_exercise_ids.csv', sep=',', index=False)
myocardial_ids.to_csv('new_myocardial_ids.csv', sep=',', index=False)

In [None]:
# print(myocardial_ids.info())

In [None]:
# print(myocardial_ids.iloc[:,0:5])

In [None]:
# 42000-0.0 -> Date of myocardial infarction
# 42001-0.0 -> Source of myocardial infarction report

# 42002-0.0 -> Date of STEMI (ST-Elevation Myocardial Infarction)
# 42003-0.0 -> Source of STEMI report

# 42004-0.0 -> Date of NSTEMI
# 42005-0.0 -> Source of NSTEMI report

# 42006-0.0 -> Date of stroke
# 42007-0.0 -> Source of stroke report

# 42008-0.0 -> Date of ischaemic stroke
# 42009-0.0 -> Source of ischaemic stroke report

In [None]:
myo_infarct = myocardial_ids[['eid', '42000-0.0']].dropna()
STEMI = myocardial_ids[['eid', '42004-0.0']].dropna()

In [None]:
print('myocardial infarction: ' + str(len(myo_infarct)))
print('STEMI: ' + str(len(STEMI)))

In [None]:
discard_ids = pd.concat([myo_infarct['eid'], STEMI['eid']])

### Exercise!! 

In [None]:
# 904  --  Number of days/week of vigorous physical activity 10+ minutes
# 1001  -- Duration of strenuous sports
# 991  --  Frequency of strenuous sports in last 4 weeks
# 10971 -- Duration of vigorous physical activity (pilot)
# 914  --  Duration of vigorous activity
# 2634  -- Duration of heavy DIY


# 1100  --  Drive faster than motorway speed limit
# 1021  --  Duration of light DIY
# 894  --  Duration of moderate activity
# 10962  --  Duration of moderate physical activity (pilot)
# 3647  --  Duration of other exercises
# 874  --  Duration of walks
# 10953  --  Duration of walks (pilot)
# 981  --  Duration walking for pleasure
# 2624  --  Frequency of heavy DIY in last 4 weeks
# 1011  --  Frequency of light DIY in last 4 weeks
# 3637  --  Frequency of other exercises in last 4 weeks
# 943  --  Frequency of stair climbing in last 4 weeks
# 971  --  Frequency of walking for pleasure in last 4 weeks
# 884  --  Number of days/week of moderate physical activity 10+ minutes
# 864  --  Number of days/week walked 10+ minutes
# 1090  --  Time spent driving
# 1080  --  Time spent using computer
# 1070  --  Time spent watching television (TV)
# 6164  --  Types of physical activity in last 4 weeks
# 6162  --  Types of transport used (excluding work)
# 924  --  Usual walking pace

In [None]:
# 904  --  Number of days/week of vigorous physical activity 10+ minutes
# 1001  -- Duration of strenuous sports
# 991  --  Frequency of strenuous sports in last 4 weeks
# 10971 -- Duration of vigorous physical activity (pilot)
# 914  --  Duration of vigorous activity
# 2634  -- Duration of heavy DIY

# Codification:

# 1 --  Less than 30 mins
# 2 --  30 mins to 1 hour
# 3 --  1 to 2 hours
# 4 --  2 to 4 hours
# 5 --  More than 4 hours
# -1 --  Do not know
# -3 --  Prefer not to answer

# strenuous_exercise = exercise_ids[['eid', '904-0.0', '1001-0.0', '991-0.0', '10971-0.0', '914-0.0', '2634-0.0']] # .dropna()
strenuous_exercise = exercise_ids[['eid', '904-0.0', '10971-0.0']].dropna()

In [None]:
# strenuous_exercise.hist()
# strenuous_exercise

In [None]:
strenuous_exercise['discard'] = np.where((strenuous_exercise['904-0.0'] >= 2.0) & 
                                         (strenuous_exercise['10971-0.0'] >= 3.0), 1, np.nan)
strenuous_exercise = strenuous_exercise.dropna()

In [None]:
print('Participants practicing strenuous exercise: ' + str(len(strenuous_exercise)))

In [None]:
discard_ids = pd.concat([discard_ids, strenuous_exercise['eid']])

### Diabetes!!

In [None]:
diabetes_ids = common_ids[['ID', 'dm']]
diabetes_ids['discard'] = np.where((diabetes_ids['dm'] > 0.0), 1, np.nan)
diabetes_ids = diabetes_ids.dropna()

In [None]:
print('Participants with diabetes: ' + str(len(diabetes_ids)))

In [None]:
# diabetes_ids

In [None]:
discard_ids = pd.concat([discard_ids, diabetes_ids['ID']])

In [None]:
discard_ids

In [None]:
# Extracting unique ids with all the previous conditions: myocardial infarction strenous exercise and diabetes
unique_discard_ids = pd.DataFrame(discard_ids.unique())
print('Number of ids: ' + str(len(unique_discard_ids)))

### Removing discarded ids from the lists manual and automatic

- Manual means the ids with manual delineation in the cmr images
- Automatic ids means the ids after quality assessment from the fundus

In [None]:
# Reading IDs
manual_ids = pd.read_csv('./manual_LVM_LVEDV_mtdt.csv') 
# Reading exercise IDs
automatic_ids = pd.read_csv('./ROIS_LVM_LVEDV_MTDT.csv') 

In [None]:
print('Manual IDs: ' + str(len(manual_ids)))
print('Automatic IDs: ' + str(len(automatic_ids)))

In [None]:
new_automatic_ids = automatic_ids[~automatic_ids.ID.isin(unique_discard_ids.iloc[:,0])]
new_manual_ids = manual_ids[~manual_ids.ID.isin(unique_discard_ids.iloc[:,0])]

In [None]:
print('New automatic: ' + str(len(new_automatic_ids)))
print('New manual: '  + str(len(new_manual_ids)))

In [None]:
# new_manual_ids.to_csv('manual_LVM_LVEDV_mtdt_reduced.csv', index=False)

### Normalising LVM and LVEDV by BSA

In [None]:
# The formula of DuBois
# BSA = (W^(0.425) * H^(0.725)) * 0.007184
# http://www-users.med.cornell.edu/~spon/picu/calc/bsacalc.htm

In [None]:
# new_manual_ids['LVEDV'] = round(new_manual_ids['LVEDV']/((pow(new_manual_ids['w'],0.425)*pow(new_manual_ids['h'],0.725))*0.007184),3)
# new_manual_ids['LVM'] = round(new_manual_ids['LVM']/((pow(new_manual_ids['w'],0.425)*pow(new_manual_ids['h'],0.725))*0.007184),3)

In [None]:
# print(len(new_manual_ids))
# print(len(new_manual_ids.dropna()))
# new_manual_ids = new_manual_ids.dropna()

In [None]:
new_manual_ids.to_csv('manual_LVM_LVEDV_mtdt_reduced.csv', index=False)
# new_manual_ids

In [None]:
# new_automatic_ids['LVEDV_automatic'] = round(new_automatic_ids['LVEDV_automatic']/((pow(new_automatic_ids['w'],0.425)*pow(new_automatic_ids['h'],0.725))*0.007184),3)
# new_automatic_ids['LVM_automatic'] = round(new_automatic_ids['LVM_automatic']/((pow(new_automatic_ids['w'],0.425)*pow(new_automatic_ids['h'],0.725))*0.007184),3)

In [None]:
# print(len(new_automatic_ids))
# print(len(new_automatic_ids.dropna()))
# new_automatic_ids = new_automatic_ids.dropna()

In [None]:
new_automatic_ids.to_csv('automatic_LVM_LVEDV_mtdt_reduced.csv', index=False)