# Description

The goal of this notebook is very simple: it just lists the content (traits/diseases) that belong to each cluster across all selected "best partitions". Although one would take a look at them here to check whether clusters of traits make sense, that analysis is carried out first by looking at the clustering trees (which are generated later). Then, this notebooks serves as a simple list with the content of the clusters.

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.display import display
from pathlib import Path

import pandas as pd

from utils import generate_result_set_name
import conf

# Settings

In [3]:
CONSENSUS_CLUSTERING_DIR = Path(
    conf.RESULTS["CLUSTERING_DIR"], "consensus_clustering"
).resolve()

display(CONSENSUS_CLUSTERING_DIR)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/results/clustering/consensus_clustering')

## Load data

In [4]:
INPUT_SUBSET = "umap"

In [5]:
INPUT_STEM = "z_score_std-projection-smultixcan-efo_partial-mashr-zscores"

In [6]:
DR_OPTIONS = {
    "n_components": 5,
    "metric": "euclidean",
    "n_neighbors": 15,
    "random_state": 0,
}

In [7]:
input_filepath = Path(
    conf.RESULTS["DATA_TRANSFORMATIONS_DIR"],
    INPUT_SUBSET,
    generate_result_set_name(
        DR_OPTIONS, prefix=f"{INPUT_SUBSET}-{INPUT_STEM}-", suffix=".pkl"
    ),
).resolve()
display(input_filepath)

assert input_filepath.exists(), "Input file does not exist"

input_filepath_stem = input_filepath.stem
display(input_filepath_stem)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/results/data_transformations/umap/umap-z_score_std-projection-smultixcan-efo_partial-mashr-zscores-metric_euclidean-n_components_5-n_neighbors_15-random_state_0.pkl')

'umap-z_score_std-projection-smultixcan-efo_partial-mashr-zscores-metric_euclidean-n_components_5-n_neighbors_15-random_state_0'

In [8]:
data_umap = pd.read_pickle(input_filepath)

In [9]:
data_umap.shape

(3752, 5)

In [10]:
data_umap.head()

Unnamed: 0,UMAP1,UMAP2,UMAP3,UMAP4,UMAP5
100001_raw-Food_weight,0.426554,0.670532,7.363805,1.171837,6.297295
100002_raw-Energy,-1.605179,0.815699,8.288521,0.990394,6.817351
100003_raw-Protein,-1.656178,0.788297,8.355906,1.017072,6.845651
100004_raw-Fat,-1.508325,0.802536,8.328274,1.033939,6.709319
100005_raw-Carbohydrate,-1.617872,0.812711,8.307973,1.020575,6.825944


# Load best partitions

In [11]:
input_file = Path(CONSENSUS_CLUSTERING_DIR, "best_partitions_by_k.pkl").resolve()
display(input_file)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/results/clustering/consensus_clustering/best_partitions_by_k.pkl')

In [12]:
best_partitions = pd.read_pickle(input_file)

In [13]:
best_partitions.shape

(59, 4)

In [14]:
best_partitions.head()

Unnamed: 0_level_0,method,partition,ari_median,selected
k,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
11,scc_025,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.090231,True
13,scc_025,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",0.090222,True
14,scc_025,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.090138,True
22,scc_020,"[1, 19, 19, 19, 19, 19, 19, 19, 19, 1, 19, 19,...",0.090117,True
12,scc_025,"[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1...",0.089742,True


# Analysis of clusterings

In [15]:
from IPython.display import HTML

In [16]:
def show_cluster_stats(clustering_data, selected_partition, selected_cluster):
    traits = [t for t in clustering_data[selected_partition == selected_cluster].index]
    display(len(traits))
    display(traits)

In [17]:
selected_k_values = best_partitions[best_partitions["selected"]].index.tolist()
selected_k_values.sort()
display(selected_k_values)

[5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 22, 25, 26]

In [18]:
for k in selected_k_values:
    display(HTML(f"<h2>Partition with k={k}</h2>"))
    display(best_partitions.loc[k])

    part = best_partitions.loc[k, "partition"]
    display(part.shape)

    part_stats = pd.Series(part).value_counts()
    display(part_stats)

    for cluster_number in part_stats.index.sort_values():
        display(HTML(f"<h3>Cluster {k}.{cluster_number}</h3>"))

        cluster_traits = data_umap[part == cluster_number].index
        display(len(cluster_traits))
        display(cluster_traits)

method                                                  scc_030
partition     [1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 4, 4, 1, 1, 1, ...
ari_median                                            0.0876165
selected                                                   True
Name: 5, dtype: object

(3752,)

1    3233
4     445
0      47
3      15
2      12
dtype: int64

47

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23104_raw-Body_mass_index_BMI', '23105_raw-Basal_metabolic_rate',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right',
       '23115_raw-Leg_fat_percentage_left', '23116_raw-Leg_fat_mass_left',
       '23117_raw-Leg_

3233

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100010-Portion_size', '100013_raw-Vitamin_B12',
       ...
       'viral disease', 'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3233)

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

15

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count', 'erythrocyte count',
       'reticulocyte count'],
      dtype='object')

445

Index(['100009_raw-Englyst_dietary_fibre', '100011_raw-Iron',
       '100012_raw-Vitamin_B6', '100016_raw-Potassium', '100017_raw-Magnesium',
       '1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       ...
       'sum of basophil and neutrophil counts',
       'sum of eosinophil and basophil counts',
       'sum of neutrophil and eosinophil counts',
       'systemic lupus erythematosus', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins',
       'wellbeing measurement AND family relationship', 'wheezing',
       'worry measurement'],
      dtype='object', length=445)

method                              eac_complete_coassoc_matrix
partition     [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
ari_median                                            0.0893994
selected                                                   True
Name: 6, dtype: object

(3752,)

2    3313
1     311
0      57
4      36
3      23
5      12
dtype: int64

57

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '20154_raw-Forced_expiratory_volume_in_1second_FEV1_predicted_percentage',
       '21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23104_raw-Body_mass_index_BMI', '23105_raw-Basal_metabolic_rate',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right',
       '23115_r

311

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       '1110-Length_of_mobile_phone_use',
       ...
       'skin neoplasm', 'sleep duration', 'smoking behavior',
       'smoking cessation', 'snoring measurement', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins', 'wheezing',
       'worry measurement'],
      dtype='object', length=311)

3313

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3313)

23

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30060_raw-Mean_corpuscular_haemoglobin_concentration',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width',
       '30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       '46_raw-Hand_grip_strength_left', '47_raw-Hand_grip_strength_right',
       'erythrocyte count', 'platelet count', 'reticulocyte count'

36

Index(['20003_1140861998-Treatmentmedication_code_ventolin_100micrograms_inhaler',
       '20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '30000_raw-White_blood_cell_leukocyte_count',
       '30120_raw-Lymphocyte_count', '30130_raw-Monocyte_count',
       '30140_raw-Neutrophill_count', '30150-Eosinophill_count',
       '30180_raw-Lymphocyte_percentage', '30190_raw-Monocyte_percentage',
       '30200_raw-Neutrophill_percentage', '30210_raw-Eosinophill_percentage',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       '6152_100-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_None_of_the_above',
       '6152_8-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Asthma',
       '6152_9-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_ecz

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

method                              eac_complete_coassoc_matrix
partition     [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
ari_median                                            0.0893994
selected                                                   True
Name: 7, dtype: object

(3752,)

2    3313
0     311
3      49
4      36
1      23
5      12
6       8
dtype: int64

311

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       '1110-Length_of_mobile_phone_use',
       ...
       'skin neoplasm', 'sleep duration', 'smoking behavior',
       'smoking cessation', 'snoring measurement', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins', 'wheezing',
       'worry measurement'],
      dtype='object', length=311)

23

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30060_raw-Mean_corpuscular_haemoglobin_concentration',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width',
       '30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       '46_raw-Hand_grip_strength_left', '47_raw-Hand_grip_strength_right',
       'erythrocyte count', 'platelet count', 'reticulocyte count'

3313

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3313)

49

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '20154_raw-Forced_expiratory_volume_in_1second_FEV1_predicted_percentage',
       '21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23104_raw-Body_mass_index_BMI', '23105_raw-Basal_metabolic_rate',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right',
       '23115_r

36

Index(['20003_1140861998-Treatmentmedication_code_ventolin_100micrograms_inhaler',
       '20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '30000_raw-White_blood_cell_leukocyte_count',
       '30120_raw-Lymphocyte_count', '30130_raw-Monocyte_count',
       '30140_raw-Neutrophill_count', '30150-Eosinophill_count',
       '30180_raw-Lymphocyte_percentage', '30190_raw-Monocyte_percentage',
       '30200_raw-Neutrophill_percentage', '30210_raw-Eosinophill_percentage',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       '6152_100-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_None_of_the_above',
       '6152_8-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Asthma',
       '6152_9-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_ecz

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

method                                                  scc_025
partition     [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
ari_median                                            0.0894507
selected                                                   True
Name: 8, dtype: object

(3752,)

6    3285
0     362
7      47
5      18
1      15
3      12
4       8
2       5
dtype: int64

362

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'smoking behavior', 'smoking cessation', 'snoring measurement',
       'systemic lupus erythematosus', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins',
       'wellbeing measurement AND family relationship', 'wheezing',
       'worry measurement'],
      dtype='object', length=362)

15

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count', 'erythrocyte count',
       'reticulocyte count'],
      dtype='object')

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

18

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30130_raw-Monocyte_count', '30140_raw-Neutrophill_count',
       '30150-Eosinophill_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       '30210_raw-Eosinophill_percentage', 'eosinophil count',
       'granulocyte count', 'leukocyte count', 'lymphocyte count',
       'monocyte count', 'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of eosinophil and basophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

3285

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral disease', 'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3285)

47

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23104_raw-Body_mass_index_BMI', '23105_raw-Basal_metabolic_rate',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right',
       '23115_raw-Leg_fat_percentage_left', '23116_raw-Leg_fat_mass_left',
       '23117_raw-Leg_

method                                                  scc_025
partition     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
ari_median                                            0.0894293
selected                                                   True
Name: 9, dtype: object

(3752,)

0    3285
8     362
7      29
5      18
6      18
2      15
3      12
4       8
1       5
dtype: int64

3285

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral disease', 'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3285)

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

15

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count', 'erythrocyte count',
       'reticulocyte count'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

18

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30130_raw-Monocyte_count', '30140_raw-Neutrophill_count',
       '30150-Eosinophill_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       '30210_raw-Eosinophill_percentage', 'eosinophil count',
       'granulocyte count', 'leukocyte count', 'lymphocyte count',
       'monocyte count', 'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of eosinophil and basophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

362

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'smoking behavior', 'smoking cessation', 'snoring measurement',
       'systemic lupus erythematosus', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins',
       'wellbeing measurement AND family relationship', 'wheezing',
       'worry measurement'],
      dtype='object', length=362)

method                              eac_complete_coassoc_matrix
partition     [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
ari_median                                            0.0893488
selected                                                   True
Name: 10, dtype: object

(3752,)

2    3313
0     284
7      49
4      36
8      19
9      16
5      12
3       8
6       8
1       7
dtype: int64

284

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       '1110-Length_of_mobile_phone_use',
       ...
       'skin neoplasm', 'sleep duration', 'smoking behavior',
       'smoking cessation', 'snoring measurement', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins', 'wheezing',
       'worry measurement'],
      dtype='object', length=284)

7

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width',
       '46_raw-Hand_grip_strength_left', '47_raw-Hand_grip_strength_right',
       'platelet count'],
      dtype='object')

3313

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3313)

8

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1737-Childhood_sunburn_occasions',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

36

Index(['20003_1140861998-Treatmentmedication_code_ventolin_100micrograms_inhaler',
       '20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '30000_raw-White_blood_cell_leukocyte_count',
       '30120_raw-Lymphocyte_count', '30130_raw-Monocyte_count',
       '30140_raw-Neutrophill_count', '30150-Eosinophill_count',
       '30180_raw-Lymphocyte_percentage', '30190_raw-Monocyte_percentage',
       '30200_raw-Neutrophill_percentage', '30210_raw-Eosinophill_percentage',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       '6152_100-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_None_of_the_above',
       '6152_8-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Asthma',
       '6152_9-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_ecz

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

49

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '20154_raw-Forced_expiratory_volume_in_1second_FEV1_predicted_percentage',
       '21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23104_raw-Body_mass_index_BMI', '23105_raw-Basal_metabolic_rate',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right',
       '23115_r

19

Index(['EAGLE_Eczema', 'Jones_et_al_2016_Chronotype', 'MAGNETIC_CH2.DB.ratio',
       'MAGNETIC_HDL.C', 'MAGNETIC_IDL.TG', 'MAGNETIC_LDL.C',
       'SSGAC_Education_Years_Pooled', 'alzheimer's disease',
       'attention deficit hyperactivity disorder', 'bone density',
       'coronary artery disease', 'estrogen-receptor negative breast cancer',
       'estrogen-receptor positive breast cancer',
       'family history of breast cancer', 'fasting blood glucose measurement',
       'fasting blood insulin measurement', 'inflammatory bowel disease',
       'intracranial volume measurement', 'schizophrenia'],
      dtype='object')

16

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30060_raw-Mean_corpuscular_haemoglobin_concentration',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count', 'erythrocyte count',
       'reticulocyte count'],
      dtype='object')

method                                                  scc_025
partition     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
ari_median                                            0.0902308
selected                                                   True
Name: 11, dtype: object

(3752,)

0     3300
10     340
9       29
8       18
6       18
5       12
2        9
1        8
7        7
4        6
3        5
dtype: int64

3300

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3300)

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

9

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume', 'erythrocyte count'],
      dtype='object')

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

18

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30130_raw-Monocyte_count', '30140_raw-Neutrophill_count',
       '30150-Eosinophill_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       '30210_raw-Eosinophill_percentage', 'eosinophil count',
       'granulocyte count', 'leukocyte count', 'lymphocyte count',
       'monocyte count', 'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of eosinophil and basophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

340

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'sleep duration', 'smoking behavior', 'smoking cessation',
       'snoring measurement', 'systemic lupus erythematosus',
       'tiredness measurement', 'type i diabetes mellitus', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=340)

method                                                  scc_025
partition     [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1...
ari_median                                            0.0897423
selected                                                   True
Name: 12, dtype: object

(3752,)

10    3286
0      354
9       29
8       18
5       12
11      10
2        9
7        8
4        8
6        7
3        6
1        5
dtype: int64

354

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'smoking behavior', 'smoking cessation', 'snoring measurement',
       'systemic lupus erythematosus', 'tiredness measurement',
       'type i diabetes mellitus', 'varicose veins',
       'wellbeing measurement AND family relationship', 'wheezing',
       'worry measurement'],
      dtype='object', length=354)

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

9

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume', 'erythrocyte count'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

3286

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral disease', 'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3286)

10

Index(['30130_raw-Monocyte_count', '30150-Eosinophill_count',
       '30180_raw-Lymphocyte_percentage', '30190_raw-Monocyte_percentage',
       '30200_raw-Neutrophill_percentage', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'lymphocyte count', 'monocyte count',
       'sum of eosinophil and basophil counts'],
      dtype='object')

method                                                  scc_025
partition     [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
ari_median                                            0.0902222
selected                                                   True
Name: 13, dtype: object

(3752,)

2     3292
0      338
11      29
10      18
7       14
8       12
9       10
6        9
4        8
3        7
5        6
1        5
12       4
dtype: int64

338

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self reported educational attainment', 'sleep duration',
       'smoking behavior', 'smoking cessation', 'snoring measurement',
       'tiredness measurement', 'type i diabetes mellitus', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=338)

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

3292

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3292)

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

9

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume', 'erythrocyte count'],
      dtype='object')

14

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30130_raw-Monocyte_count', '30140_raw-Neutrophill_count',
       '30180_raw-Lymphocyte_percentage', '30190_raw-Monocyte_percentage',
       '30200_raw-Neutrophill_percentage', 'granulocyte count',
       'leukocyte count', 'lymphocyte count', 'monocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

10

Index(['20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       'K11_COELIAC-Coeliac_disease', 'celiac disease',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema',
       'malabsorption syndrome', 'psoriasis', 'rheumatoid arthritis',
       'systemic lupus erythematosus'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

method                                                  scc_025
partition     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
ari_median                                            0.0901384
selected                                                   True
Name: 14, dtype: object

(3752,)

0     3294
13     336
11      29
12      18
3       12
9       10
6        9
1        8
8        8
7        7
10       6
2        6
5        5
4        4
dtype: int64

3294

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3294)

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

9

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume', 'erythrocyte count'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

10

Index(['20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       'K11_COELIAC-Coeliac_disease', 'celiac disease',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema',
       'malabsorption syndrome', 'psoriasis', 'rheumatoid arthritis',
       'systemic lupus erythematosus'],
      dtype='object')

6

Index(['30130_raw-Monocyte_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       'lymphocyte count', 'monocyte count'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

336

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self reported educational attainment', 'sleep duration',
       'smoking behavior', 'smoking cessation', 'snoring measurement',
       'tiredness measurement', 'type i diabetes mellitus', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=336)

method                                                  scc_025
partition     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
ari_median                                            0.0896789
selected                                                   True
Name: 15, dtype: object

(3752,)

0     3290
14     326
11      29
10      18
13      14
5       12
9       10
3        9
8        8
6        8
7        7
12       6
4        6
2        5
1        4
dtype: int64

3290

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3290)

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

9

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume', 'erythrocyte count'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

10

Index(['20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       'K11_COELIAC-Coeliac_disease', 'celiac disease',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema',
       'malabsorption syndrome', 'psoriasis', 'rheumatoid arthritis',
       'systemic lupus erythematosus'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

6

Index(['30130_raw-Monocyte_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       'lymphocyte count', 'monocyte count'],
      dtype='object')

14

Index(['20003_1140861958-Treatmentmedication_code_simvastatin',
       '20003_1141194794-Treatmentmedication_code_bendroflumethiazide',
       '4079_raw-Diastolic_blood_pressure_automated_reading',
       '4080_raw-Systolic_blood_pressure_automated_reading',
       '6150_100-Vascularheart_problems_diagnosed_by_doctor_None_of_the_above',
       '6150_4-Vascularheart_problems_diagnosed_by_doctor_High_blood_pressure',
       '6153_1-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Cholesterol_lowering_medication',
       '6153_100-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_None_of_the_above',
       '6153_2-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Blood_pressure_medication',
       '6177_1-Medication_for_cholesterol_blood_pressure_or_diabetes_Cholesterol_lowering_medication',
       '6177_100-Medication_for_cholesterol_blood_pressure_or_diabetes_None_of_the_above',
       '6177_2-Medicatio

326

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self reported educational attainment', 'sleep duration',
       'smoking behavior', 'smoking cessation', 'snoring measurement',
       'tiredness measurement', 'type i diabetes mellitus', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=326)

method                                                  scc_025
partition     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
ari_median                                            0.0892882
selected                                                   True
Name: 16, dtype: object

(3752,)

0     3319
14     295
11      29
10      18
12      14
1       12
2        9
8        8
6        8
9        7
7        7
13       6
5        6
15       5
3        5
4        4
dtype: int64

3319

Index(['100001_raw-Food_weight', '100002_raw-Energy', '100003_raw-Protein',
       '100004_raw-Fat', '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100010-Portion_size',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3319)

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

9

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', '30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30070_raw-Red_blood_cell_erythrocyte_distribution_width',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume', 'erythrocyte count'],
      dtype='object')

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

7

Index(['20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema',
       'psoriasis'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

29

Index(['20015_raw-Sitting_height',
       '20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23106_raw-Impedance_of_whole_body',
       '23107_raw-Impedance_of_leg_right', '23108_raw-Impedance_of_leg_left',
       '23109_raw-Impedance_of_arm_right', '23110_raw-Impedance_of_arm_left',
       '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '3062_raw-Forced_vital_capacity_FVC',
       '30

14

Index(['20003_1140861958-Treatmentmedication_code_simvastatin',
       '20003_1141194794-Treatmentmedication_code_bendroflumethiazide',
       '4079_raw-Diastolic_blood_pressure_automated_reading',
       '4080_raw-Systolic_blood_pressure_automated_reading',
       '6150_100-Vascularheart_problems_diagnosed_by_doctor_None_of_the_above',
       '6150_4-Vascularheart_problems_diagnosed_by_doctor_High_blood_pressure',
       '6153_1-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Cholesterol_lowering_medication',
       '6153_100-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_None_of_the_above',
       '6153_2-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Blood_pressure_medication',
       '6177_1-Medication_for_cholesterol_blood_pressure_or_diabetes_Cholesterol_lowering_medication',
       '6177_100-Medication_for_cholesterol_blood_pressure_or_diabetes_None_of_the_above',
       '6177_2-Medicatio

6

Index(['30130_raw-Monocyte_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       'lymphocyte count', 'monocyte count'],
      dtype='object')

295

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self rated health', 'self reported educational attainment',
       'sleep duration', 'smoking behavior', 'smoking cessation',
       'snoring measurement', 'tiredness measurement', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=295)

5

Index(['K11_COELIAC-Coeliac_disease', 'celiac disease',
       'malabsorption syndrome', 'rheumatoid arthritis',
       'systemic lupus erythematosus'],
      dtype='object')

method                                                  scc_020
partition     [1, 19, 19, 19, 19, 19, 19, 19, 19, 1, 19, 19,...
ari_median                                            0.0901165
selected                                                   True
Name: 22, dtype: object

(3752,)

1     3321
20     248
17      18
18      17
15      15
19      15
9       14
14      13
0       12
13      12
16       9
11       8
10       8
7        7
8        6
6        6
12       5
2        5
3        4
5        4
4        4
21       1
dtype: int64

12

Index(['20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       'K11_COELIAC-Coeliac_disease', 'celiac disease',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema',
       'malabsorption syndrome', 'psoriasis', 'rheumatoid arthritis',
       'systemic lupus erythematosus'],
      dtype='object')

3321

Index(['100001_raw-Food_weight', '100010-Portion_size',
       '100013_raw-Vitamin_B12', '100015_raw-Vitamin_C', '100018_raw-Retinol',
       '100019_raw-Carotene', '100021_raw-Vitamin_D', '100022-Alcohol',
       '100023_raw-Starch', '1001-Duration_of_strenuous_sports',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3321)

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

4

Index(['30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume'],
      dtype='object')

4

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', 'erythrocyte count'],
      dtype='object')

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

6

Index(['30130_raw-Monocyte_count', '30180_raw-Lymphocyte_percentage',
       '30190_raw-Monocyte_percentage', '30200_raw-Neutrophill_percentage',
       'lymphocyte count', 'monocyte count'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

14

Index(['20003_1140861958-Treatmentmedication_code_simvastatin',
       '20003_1141194794-Treatmentmedication_code_bendroflumethiazide',
       '4079_raw-Diastolic_blood_pressure_automated_reading',
       '4080_raw-Systolic_blood_pressure_automated_reading',
       '6150_100-Vascularheart_problems_diagnosed_by_doctor_None_of_the_above',
       '6150_4-Vascularheart_problems_diagnosed_by_doctor_High_blood_pressure',
       '6153_1-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Cholesterol_lowering_medication',
       '6153_100-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_None_of_the_above',
       '6153_2-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Blood_pressure_medication',
       '6177_1-Medication_for_cholesterol_blood_pressure_or_diabetes_Cholesterol_lowering_medication',
       '6177_100-Medication_for_cholesterol_blood_pressure_or_diabetes_None_of_the_above',
       '6177_2-Medicatio

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

5

Index(['30120_raw-Lymphocyte_count',
       '6152_100-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_None_of_the_above',
       '6152_8-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Asthma',
       '6152_9-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Hayfever_allergic_rhinitis_or_eczema',
       'asthma'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

13

Index(['20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '3062_raw-Forced_vital_capacity_FVC',
       '3063_raw-Forced_expiratory_volume_in_1second_FEV1',
       '3064_raw-Peak_expiratory_flow_PEF', '3143_raw-Ankle_spacing_width',
       '4100_raw-Ankle_spacing_width_left',
       '4119_raw-Ankle_spacing_width_right'],
      dtype='object')

15

Index(['EAGLE_Eczema', 'Jones_et_al_2016_Chronotype', 'MAGNETIC_CH2.DB.ratio',
       'MAGNETIC_HDL.C', 'MAGNETIC_IDL.TG', 'MAGNETIC_LDL.C',
       'alzheimer's disease', 'attention deficit hyperactivity disorder',
       'coronary artery disease', 'estrogen-receptor negative breast cancer',
       'estrogen-receptor positive breast cancer',
       'family history of breast cancer', 'fasting blood glucose measurement',
       'fasting blood insulin measurement', 'inflammatory bowel disease'],
      dtype='object')

9

Index(['6150_1-Vascularheart_problems_diagnosed_by_doctor_Heart_attack',
       'I9_CHD-Major_coronary_heart_disease_event',
       'I9_CHD_NOREV-Major_coronary_heart_disease_event_excluding_revascularizations',
       'I9_CORATHER-Coronary_atherosclerosis',
       'I9_IHD-Ischaemic_heart_disease_wide_definition',
       'I9_MI-Myocardial_infarction',
       'I9_MI_STRICT-Myocardial_infarction_strict',
       'acute myocardial infarction', 'myocardial infarction'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

17

Index(['20015_raw-Sitting_height',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '50_raw-Standing_height',
       'body height'],
      dtype='object')

15

Index(['100002_raw-Energy', '100003_raw-Protein', '100004_raw-Fat',
       '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100011_raw-Iron',
       '100012_raw-Vitamin_B6', '100014_raw-Folate', '100016_raw-Potassium',
       '100017_raw-Magnesium', '100024_raw-Calcium', '100025_raw-Vitamin_E'],
      dtype='object')

248

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self rated health', 'self reported educational attainment',
       'sleep duration', 'smoking behavior', 'smoking cessation',
       'snoring measurement', 'tiredness measurement', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=248)

1

Index(['30070_raw-Red_blood_cell_erythrocyte_distribution_width'], dtype='object')

method                                                  scc_020
partition     [0, 19, 19, 19, 19, 19, 19, 19, 19, 0, 19, 19,...
ari_median                                            0.0889124
selected                                                   True
Name: 25, dtype: object

(3752,)

0     3302
22     220
23      47
18      18
17      17
11      15
19      15
13      14
16      13
14      12
15       9
9        8
8        8
10       8
4        7
6        6
12       5
2        5
1        4
3        4
5        4
21       4
24       3
7        3
20       1
dtype: int64

3302

Index(['100001_raw-Food_weight', '100010-Portion_size',
       '100013_raw-Vitamin_B12', '100015_raw-Vitamin_C', '100018_raw-Retinol',
       '100019_raw-Carotene', '100021_raw-Vitamin_D', '100022-Alcohol',
       '100023_raw-Starch', '1001-Duration_of_strenuous_sports',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3302)

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

4

Index(['30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

4

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', 'erythrocyte count'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

3

Index(['30130_raw-Monocyte_count', '30190_raw-Monocyte_percentage',
       'monocyte count'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

8

Index(['20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema',
       'psoriasis', 'systemic lupus erythematosus'],
      dtype='object')

15

Index(['EAGLE_Eczema', 'Jones_et_al_2016_Chronotype', 'MAGNETIC_CH2.DB.ratio',
       'MAGNETIC_HDL.C', 'MAGNETIC_IDL.TG', 'MAGNETIC_LDL.C',
       'alzheimer's disease', 'attention deficit hyperactivity disorder',
       'coronary artery disease', 'estrogen-receptor negative breast cancer',
       'estrogen-receptor positive breast cancer',
       'family history of breast cancer', 'fasting blood glucose measurement',
       'fasting blood insulin measurement', 'inflammatory bowel disease'],
      dtype='object')

5

Index(['30120_raw-Lymphocyte_count',
       '6152_100-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_None_of_the_above',
       '6152_8-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Asthma',
       '6152_9-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Hayfever_allergic_rhinitis_or_eczema',
       'asthma'],
      dtype='object')

14

Index(['20003_1140861958-Treatmentmedication_code_simvastatin',
       '20003_1141194794-Treatmentmedication_code_bendroflumethiazide',
       '4079_raw-Diastolic_blood_pressure_automated_reading',
       '4080_raw-Systolic_blood_pressure_automated_reading',
       '6150_100-Vascularheart_problems_diagnosed_by_doctor_None_of_the_above',
       '6150_4-Vascularheart_problems_diagnosed_by_doctor_High_blood_pressure',
       '6153_1-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Cholesterol_lowering_medication',
       '6153_100-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_None_of_the_above',
       '6153_2-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Blood_pressure_medication',
       '6177_1-Medication_for_cholesterol_blood_pressure_or_diabetes_Cholesterol_lowering_medication',
       '6177_100-Medication_for_cholesterol_blood_pressure_or_diabetes_None_of_the_above',
       '6177_2-Medicatio

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

9

Index(['6150_1-Vascularheart_problems_diagnosed_by_doctor_Heart_attack',
       'I9_CHD-Major_coronary_heart_disease_event',
       'I9_CHD_NOREV-Major_coronary_heart_disease_event_excluding_revascularizations',
       'I9_CORATHER-Coronary_atherosclerosis',
       'I9_IHD-Ischaemic_heart_disease_wide_definition',
       'I9_MI-Myocardial_infarction',
       'I9_MI_STRICT-Myocardial_infarction_strict',
       'acute myocardial infarction', 'myocardial infarction'],
      dtype='object')

13

Index(['20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '3062_raw-Forced_vital_capacity_FVC',
       '3063_raw-Forced_expiratory_volume_in_1second_FEV1',
       '3064_raw-Peak_expiratory_flow_PEF', '3143_raw-Ankle_spacing_width',
       '4100_raw-Ankle_spacing_width_left',
       '4119_raw-Ankle_spacing_width_right'],
      dtype='object')

17

Index(['20015_raw-Sitting_height',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '50_raw-Standing_height',
       'body height'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

15

Index(['100002_raw-Energy', '100003_raw-Protein', '100004_raw-Fat',
       '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100011_raw-Iron',
       '100012_raw-Vitamin_B6', '100014_raw-Folate', '100016_raw-Potassium',
       '100017_raw-Magnesium', '100024_raw-Calcium', '100025_raw-Vitamin_E'],
      dtype='object')

1

Index(['30070_raw-Red_blood_cell_erythrocyte_distribution_width'], dtype='object')

4

Index(['K11_COELIAC-Coeliac_disease', 'celiac disease',
       'malabsorption syndrome', 'rheumatoid arthritis'],
      dtype='object')

220

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self rated health', 'self reported educational attainment',
       'sleep duration', 'smoking behavior', 'smoking cessation',
       'snoring measurement', 'tiredness measurement', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=220)

47

Index(['20117_2-Alcohol_drinker_status_Current',
       '20126_0-Bipolar_and_major_depression_status_No_Bipolar_or_Depression',
       '20403-Amount_of_alcohol_drunk_on_a_typical_drinking_day',
       '20416-Frequency_of_consuming_six_or_more_units_of_alcohol',
       '20421-Ever_felt_worried_tense_or_anxious_for_most_of_a_month_or_longer',
       '20425-Ever_worried_more_than_most_people_would_in_similar_situation',
       '20441-Ever_had_prolonged_loss_of_interest_in_normal_activities',
       '20446-Ever_had_prolonged_feelings_of_sadness_or_depression',
       '20453-Ever_taken_cannabis', '20458-General_happiness',
       '20459-General_happiness_with_own_health',
       '20460-Belief_that_own_life_is_meaningful',
       '20479-Ever_thought_that_life_not_worth_living',
       '20485-Ever_contemplated_selfharm',
       '20487-Felt_hated_by_family_member_as_a_child',
       '20488-Physically_abused_by_family_as_a_child',
       '20494-Felt_irritable_or_had_angry_outbursts_in_past_mont

3

Index(['30180_raw-Lymphocyte_percentage', '30200_raw-Neutrophill_percentage',
       'lymphocyte count'],
      dtype='object')

method                                                  scc_020
partition     [2, 19, 19, 19, 19, 19, 19, 19, 19, 2, 19, 19,...
ari_median                                            0.0870976
selected                                                   True
Name: 26, dtype: object

(3752,)

2     3320
0      145
25     101
23      18
17      17
19      15
15      15
18      14
14      13
12      12
21       9
20       9
10       8
11       8
5        7
8        6
13       5
1        5
3        4
4        4
6        4
22       3
16       3
7        3
9        3
24       1
dtype: int64

145

Index(['1021-Duration_of_light_DIY', '102_raw-Pulse_rate_automated_reading',
       '1031-Frequency_of_friendfamily_visits',
       '1050-Time_spend_outdoors_in_summer',
       '1060-Time_spent_outdoors_in_winter',
       '1070-Time_spent_watching_television_TV',
       '1080-Time_spent_using_computer', '1090-Time_spent_driving',
       '1100-Drive_faster_than_motorway_speed_limit',
       '110001-Invitation_to_complete_online_24hour_recall_dietary_questionnaire_acceptance',
       ...
       'self rated health', 'self reported educational attainment',
       'sleep duration', 'smoking behavior', 'smoking cessation',
       'snoring measurement', 'tiredness measurement', 'varicose veins',
       'wheezing', 'worry measurement'],
      dtype='object', length=145)

5

Index(['30080_raw-Platelet_count', '30090_raw-Platelet_crit',
       '30100_raw-Mean_platelet_thrombocyte_volume',
       '30110_raw-Platelet_distribution_width', 'platelet count'],
      dtype='object')

3320

Index(['100001_raw-Food_weight', '100010-Portion_size',
       '100013_raw-Vitamin_B12', '100015_raw-Vitamin_C', '100018_raw-Retinol',
       '100019_raw-Carotene', '100021_raw-Vitamin_D', '100022-Alcohol',
       '100023_raw-Starch', '1001-Duration_of_strenuous_sports',
       ...
       'viral human hepatitis infection', 'viral meningitis',
       'visual impairment', 'vitiligo', 'vitreous body disease',
       'vocal cord polyp', 'voice disorders',
       'wellbeing measurement AND family relationship', 'whooping cough',
       'wrist fracture'],
      dtype='object', length=3320)

4

Index(['30040_raw-Mean_corpuscular_volume',
       '30050_raw-Mean_corpuscular_haemoglobin',
       '30260_raw-Mean_reticulocyte_volume',
       '30270_raw-Mean_sphered_cell_volume'],
      dtype='object')

4

Index(['30150-Eosinophill_count', '30210_raw-Eosinophill_percentage',
       'eosinophil count', 'sum of eosinophil and basophil counts'],
      dtype='object')

7

Index(['1717-Skin_colour', '1727-Ease_of_skin_tanning',
       '1747_1-Hair_colour_natural_before_greying_Blonde',
       '1747_2-Hair_colour_natural_before_greying_Red',
       '1747_3-Hair_colour_natural_before_greying_Light_brown',
       '1747_4-Hair_colour_natural_before_greying_Dark_brown',
       '1747_5-Hair_colour_natural_before_greying_Black'],
      dtype='object')

4

Index(['30010_raw-Red_blood_cell_erythrocyte_count',
       '30020_raw-Haemoglobin_concentration',
       '30030_raw-Haematocrit_percentage', 'erythrocyte count'],
      dtype='object')

3

Index(['K11_COELIAC-Coeliac_disease', 'celiac disease',
       'malabsorption syndrome'],
      dtype='object')

6

Index(['30240_raw-Reticulocyte_percentage', '30250_raw-Reticulocyte_count',
       '30280_raw-Immature_reticulocyte_fraction',
       '30290_raw-High_light_scatter_reticulocyte_percentage',
       '30300_raw-High_light_scatter_reticulocyte_count',
       'reticulocyte count'],
      dtype='object')

3

Index(['30130_raw-Monocyte_count', '30190_raw-Monocyte_percentage',
       'monocyte count'],
      dtype='object')

8

Index(['5096_raw-3mm_weak_meridian_left', '5097_raw-6mm_weak_meridian_left',
       '5098_raw-6mm_weak_meridian_right', '5099_raw-3mm_weak_meridian_right',
       '5132_raw-3mm_strong_meridian_right',
       '5133_raw-6mm_strong_meridian_right',
       '5134_raw-6mm_strong_meridian_left',
       '5135_raw-3mm_strong_meridian_left'],
      dtype='object')

8

Index(['30000_raw-White_blood_cell_leukocyte_count',
       '30140_raw-Neutrophill_count', 'granulocyte count', 'leukocyte count',
       'myeloid white cell count', 'neutrophil count',
       'sum of basophil and neutrophil counts',
       'sum of neutrophil and eosinophil counts'],
      dtype='object')

12

Index(['3144_raw-Heel_Broadband_ultrasound_attenuation_direct_entry',
       '3147_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry',
       '3148_raw-Heel_bone_mineral_density_BMD',
       '4101_raw-Heel_broadband_ultrasound_attenuation_left',
       '4104_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_left',
       '4105_raw-Heel_bone_mineral_density_BMD_left',
       '4106_raw-Heel_bone_mineral_density_BMD_Tscore_automated_left',
       '4120_raw-Heel_broadband_ultrasound_attenuation_right',
       '4123_raw-Heel_quantitative_ultrasound_index_QUI_direct_entry_right',
       '4124_raw-Heel_bone_mineral_density_BMD_right',
       '4125_raw-Heel_bone_mineral_density_BMD_Tscore_automated_right',
       '78_raw-Heel_bone_mineral_density_BMD_Tscore_automated'],
      dtype='object')

5

Index(['30120_raw-Lymphocyte_count',
       '6152_100-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_None_of_the_above',
       '6152_8-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Asthma',
       '6152_9-Blood_clot_DVT_bronchitis_emphysema_asthma_rhinitis_eczema_allergy_diagnosed_by_doctor_Hayfever_allergic_rhinitis_or_eczema',
       'asthma'],
      dtype='object')

13

Index(['20150_raw-Forced_expiratory_volume_in_1second_FEV1_Best_measure',
       '20151_raw-Forced_vital_capacity_FVC_Best_measure',
       '23106_raw-Impedance_of_whole_body', '23107_raw-Impedance_of_leg_right',
       '23108_raw-Impedance_of_leg_left', '23109_raw-Impedance_of_arm_right',
       '23110_raw-Impedance_of_arm_left', '3062_raw-Forced_vital_capacity_FVC',
       '3063_raw-Forced_expiratory_volume_in_1second_FEV1',
       '3064_raw-Peak_expiratory_flow_PEF', '3143_raw-Ankle_spacing_width',
       '4100_raw-Ankle_spacing_width_left',
       '4119_raw-Ankle_spacing_width_right'],
      dtype='object')

15

Index(['EAGLE_Eczema', 'Jones_et_al_2016_Chronotype', 'MAGNETIC_CH2.DB.ratio',
       'MAGNETIC_HDL.C', 'MAGNETIC_IDL.TG', 'MAGNETIC_LDL.C',
       'alzheimer's disease', 'attention deficit hyperactivity disorder',
       'coronary artery disease', 'estrogen-receptor negative breast cancer',
       'estrogen-receptor positive breast cancer',
       'family history of breast cancer', 'fasting blood glucose measurement',
       'fasting blood insulin measurement', 'inflammatory bowel disease'],
      dtype='object')

3

Index(['psoriasis', 'rheumatoid arthritis', 'systemic lupus erythematosus'], dtype='object')

17

Index(['20015_raw-Sitting_height',
       '20153_raw-Forced_expiratory_volume_in_1second_FEV1_predicted',
       '23101_raw-Whole_body_fatfree_mass', '23102_raw-Whole_body_water_mass',
       '23105_raw-Basal_metabolic_rate', '23113_raw-Leg_fatfree_mass_right',
       '23114_raw-Leg_predicted_mass_right', '23117_raw-Leg_fatfree_mass_left',
       '23118_raw-Leg_predicted_mass_left', '23121_raw-Arm_fatfree_mass_right',
       '23122_raw-Arm_predicted_mass_right', '23125_raw-Arm_fatfree_mass_left',
       '23126_raw-Arm_predicted_mass_left', '23129_raw-Trunk_fatfree_mass',
       '23130_raw-Trunk_predicted_mass', '50_raw-Standing_height',
       'body height'],
      dtype='object')

14

Index(['20003_1140861958-Treatmentmedication_code_simvastatin',
       '20003_1141194794-Treatmentmedication_code_bendroflumethiazide',
       '4079_raw-Diastolic_blood_pressure_automated_reading',
       '4080_raw-Systolic_blood_pressure_automated_reading',
       '6150_100-Vascularheart_problems_diagnosed_by_doctor_None_of_the_above',
       '6150_4-Vascularheart_problems_diagnosed_by_doctor_High_blood_pressure',
       '6153_1-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Cholesterol_lowering_medication',
       '6153_100-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_None_of_the_above',
       '6153_2-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Blood_pressure_medication',
       '6177_1-Medication_for_cholesterol_blood_pressure_or_diabetes_Cholesterol_lowering_medication',
       '6177_100-Medication_for_cholesterol_blood_pressure_or_diabetes_None_of_the_above',
       '6177_2-Medicatio

15

Index(['100002_raw-Energy', '100003_raw-Protein', '100004_raw-Fat',
       '100005_raw-Carbohydrate', '100006_raw-Saturated_fat',
       '100007_raw-Polyunsaturated_fat', '100008_raw-Total_sugars',
       '100009_raw-Englyst_dietary_fibre', '100011_raw-Iron',
       '100012_raw-Vitamin_B6', '100014_raw-Folate', '100016_raw-Potassium',
       '100017_raw-Magnesium', '100024_raw-Calcium', '100025_raw-Vitamin_E'],
      dtype='object')

9

Index(['6150_1-Vascularheart_problems_diagnosed_by_doctor_Heart_attack',
       'I9_CHD-Major_coronary_heart_disease_event',
       'I9_CHD_NOREV-Major_coronary_heart_disease_event_excluding_revascularizations',
       'I9_CORATHER-Coronary_atherosclerosis',
       'I9_IHD-Ischaemic_heart_disease_wide_definition',
       'I9_MI-Myocardial_infarction',
       'I9_MI_STRICT-Myocardial_infarction_strict',
       'acute myocardial infarction', 'myocardial infarction'],
      dtype='object')

9

Index(['20003_1140883066-Treatmentmedication_code_insulin_product',
       '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
       '2976_raw-Age_diabetes_diagnosed',
       '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
       '6144_3-Never_eat_eggs_dairy_wheat_sugar_Wheat_products',
       '6153_3-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Insulin',
       '6177_3-Medication_for_cholesterol_blood_pressure_or_diabetes_Insulin',
       'hyperthyroidism AND thyrotoxicosis', 'hypothyroidism AND myxedema'],
      dtype='object')

3

Index(['30180_raw-Lymphocyte_percentage', '30200_raw-Neutrophill_percentage',
       'lymphocyte count'],
      dtype='object')

18

Index(['21001_raw-Body_mass_index_BMI', '21002_raw-Weight', '23098_raw-Weight',
       '23099_raw-Body_fat_percentage', '23100_raw-Whole_body_fat_mass',
       '23104_raw-Body_mass_index_BMI', '23111_raw-Leg_fat_percentage_right',
       '23112_raw-Leg_fat_mass_right', '23115_raw-Leg_fat_percentage_left',
       '23116_raw-Leg_fat_mass_left', '23119_raw-Arm_fat_percentage_right',
       '23120_raw-Arm_fat_mass_right', '23123_raw-Arm_fat_percentage_left',
       '23124_raw-Arm_fat_mass_left', '23127_raw-Trunk_fat_percentage',
       '23128_raw-Trunk_fat_mass', '48_raw-Waist_circumference',
       '49_raw-Hip_circumference'],
      dtype='object')

1

Index(['30070_raw-Red_blood_cell_erythrocyte_distribution_width'], dtype='object')

101

Index(['41248_1000-Destinations_on_discharge_from_hospital_recoded_Usual_Place_of_residence',
       '4194_raw-Pulse_rate', '4260-Round_of_numeric_memory_test',
       '4282-Maximum_digits_remembered_correctly',
       '4283-Number_of_rounds_of_numeric_memory_test_performed',
       '4290_raw-Duration_screen_displayed', '4291-Number_of_attempts',
       '46_raw-Hand_grip_strength_left', '47_raw-Hand_grip_strength_right',
       '4825-Noisy_workplace',
       ...
       '864-Number_of_daysweek_walked_10_minutes', '874_raw-Duration_of_walks',
       '884-Number_of_daysweek_of_moderate_physical_activity_10_minutes',
       '894-Duration_of_moderate_activity',
       '904-Number_of_daysweek_of_vigorous_physical_activity_10_minutes',
       '924-Usual_walking_pace',
       '943-Frequency_of_stair_climbing_in_last_4_weeks',
       '971-Frequency_of_walking_for_pleasure_in_last_4_weeks',
       'ICDMAIN_ANY_ENTRY-Any_ICDMAIN_event_in_hilmo_or_causes_of_death',
       'episodic memory'],
     