# Description

It analyzes which LVs are the most discriminative for different clusters of traits, and how this LVs are associated with traits using the LV-based regression framework. The steps are:
* Read clustering resutls for PhenomeXcan (discovery cohort).
* See which LVs are driving those trait clusters.
* Read LV-trait associations in PhenomeXcan and eMERGE (replication cohort).
* Perform analyses on different clusters of traits, such as autoimmune and cardiovascular clusters.

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import re
from pathlib import Path

import pandas as pd

from entity import Trait
import conf

# Settings

In [8]:
CLUSTERING_K = 29

# Paths

In [9]:
CONSENSUS_CLUSTERING_DIR = Path(
    conf.RESULTS["CLUSTERING_DIR"], "consensus_clustering"
).resolve()

display(CONSENSUS_CLUSTERING_DIR)

PosixPath('/opt/data/results/clustering/consensus_clustering')

In [10]:
CLUSTERING_LVS_DIR = (
    conf.RESULTS["CLUSTERING_INTERPRETATION"]["BASE_DIR"] / "cluster_lvs" / "part29"
)
display(CLUSTERING_LVS_DIR)
assert CLUSTERING_LVS_DIR.exists()

PosixPath('/opt/data/results/clustering/interpretation/cluster_lvs/part29')

# Load data

## Clustering data

In [11]:
INPUT_SUBSET = "z_score_std"

In [12]:
INPUT_STEM = "projection-smultixcan-efo_partial-mashr-zscores"

In [13]:
input_filepath = Path(
    conf.RESULTS["DATA_TRANSFORMATIONS_DIR"],
    INPUT_SUBSET,
    f"{INPUT_SUBSET}-{INPUT_STEM}.pkl",
).resolve()
display(input_filepath)

assert input_filepath.exists(), "Input file does not exist"

input_filepath_stem = input_filepath.stem
display(input_filepath_stem)

PosixPath('/opt/data/results/data_transformations/z_score_std/z_score_std-projection-smultixcan-efo_partial-mashr-zscores.pkl')

'z_score_std-projection-smultixcan-efo_partial-mashr-zscores'

In [14]:
input_data = pd.read_pickle(input_filepath)

In [15]:
input_data.shape

(3752, 987)

In [16]:
input_data.head()

Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV978,LV979,LV980,LV981,LV982,LV983,LV984,LV985,LV986,LV987
100001_raw-Food_weight,-0.695006,1.962565,0.057683,0.878731,-0.539977,1.481272,-0.396422,1.09018,0.759223,0.931395,...,1.129784,1.752343,-1.411403,2.823863,0.931116,-1.054519,0.432982,-0.633597,0.554279,-0.642479
100002_raw-Energy,-1.528127,-0.345309,-0.148953,-0.24206,0.373427,0.791092,0.263477,0.987702,0.354391,1.416059,...,0.224604,0.769882,-0.509482,0.091153,2.286789,-1.008256,-0.029764,1.737229,-0.272107,-0.526125
100003_raw-Protein,-0.704572,-1.011299,0.67142,0.143991,0.615212,0.874212,-0.040998,0.91517,0.254369,-0.084237,...,1.003019,1.044314,-2.376108,0.004778,0.053714,-0.892447,-0.1838,1.377991,-0.278794,-0.419733
100004_raw-Fat,-0.989832,-1.87549,0.261555,-1.420719,0.366238,1.167049,0.257387,0.717674,-0.997664,0.969825,...,0.585913,0.638314,0.119139,-0.140204,1.394326,-1.173402,0.555058,1.013982,-0.544506,-0.064061
100005_raw-Carbohydrate,-0.580143,0.243335,0.158966,-0.036558,0.068176,-0.202639,1.101281,0.675227,1.463432,1.010078,...,-0.249108,-0.026814,0.232713,0.323682,1.168642,-0.282935,0.653105,1.909526,0.199997,-1.656894


## Clustering partition

In [17]:
input_file = Path(CONSENSUS_CLUSTERING_DIR, "best_partitions_by_k.pkl").resolve()
display(input_file)

PosixPath('/opt/data/results/clustering/consensus_clustering/best_partitions_by_k.pkl')

In [18]:
best_partitions = pd.read_pickle(input_file)

In [19]:
best_partitions.shape

(59, 4)

In [20]:
best_partitions.head()

Unnamed: 0_level_0,method,partition,ari_median,selected
k,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
14,scc_025,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.090117,True
22,scc_020,"[13, 18, 18, 18, 18, 18, 18, 18, 18, 13, 18, 1...",0.0901,True
13,scc_025,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.08992,True
12,scc_025,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.089894,True
11,scc_025,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.089616,True


In [21]:
SELECTED_PARTITION = best_partitions.loc[CLUSTERING_K, "partition"]

In [22]:
SELECTED_PARTITION

array([ 0, 22, 22, ...,  0, 28,  0], dtype=int32)

In [23]:
def get_cluster_traits(cluster):
    cluster_traits = input_data[SELECTED_PARTITION == cluster].index

    traits_with_short_codes = [
        Trait.get_traits_from_efo(c)[0].code
        if Trait.is_efo_label(c)
        else Trait.get_trait(full_code=c).code
        for c in cluster_traits
    ]

    return traits_with_short_codes

## PhenomeXcan LV-trait associations

In [24]:
input_filepath = Path(conf.RESULTS["GLS"] / "gls-summary-phenomexcan.pkl.gz")
display(input_filepath)

PosixPath('/opt/data/results/gls/gls-summary-phenomexcan.pkl.gz')

In [25]:
phenomexcan_lv_trait_assocs = pd.read_pickle(input_filepath)

In [26]:
phenomexcan_lv_trait_assocs.shape

(4037817, 5)

In [27]:
phenomexcan_lv_trait_assocs.head()

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
0,AB1_OTHER_VIRAL,Other viral diseases,LV736,0.004725,0.504339
1,AB1_OTHER_VIRAL,Other viral diseases,LV320,0.004848,0.508291
2,AB1_OTHER_VIRAL,Other viral diseases,LV366,0.005306,0.523691
3,AB1_OTHER_VIRAL,Other viral diseases,LV964,0.006106,0.548143
4,AB1_OTHER_VIRAL,Other viral diseases,LV92,0.006565,0.560048


## eMERGE LV-trait associations

In [28]:
input_filepath = Path(conf.RESULTS["GLS"] / "gls-summary-emerge.pkl.gz")
display(input_filepath)

PosixPath('/opt/data/results/gls/gls-summary-emerge.pkl.gz')

In [29]:
emerge_lv_trait_assocs = pd.read_pickle(input_filepath)

In [30]:
emerge_lv_trait_assocs.shape

(304983, 5)

In [31]:
emerge_lv_trait_assocs.head()

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
0,EUR_440.2,Atherosclerosis of the extremities,LV472,1.033637e-07,0.000658
1,EUR_440.2,Atherosclerosis of the extremities,LV182,3.710244e-07,0.001432
2,EUR_440.2,Atherosclerosis of the extremities,LV348,7.379936e-07,0.002558
3,EUR_440.2,Atherosclerosis of the extremities,LV504,1.534424e-06,0.0045
4,EUR_440.2,Atherosclerosis of the extremities,LV445,2.912525e-06,0.007402


## eMERGE traits info

In [32]:
input_filepath = conf.EMERGE["DESC_FILE_WITH_SAMPLE_SIZE"]
display(input_filepath)

PosixPath('/opt/data/data/emerge/eMERGE_III_PMBB_GSA_v2_2020_phecode_AFR_EUR_cc50_counts_w_dictionary.txt')

In [33]:
emerge_traits_info = pd.read_csv(
    input_filepath,
    sep="\t",
    dtype={"phecode": str},
    usecols=[
        "phecode",
        "phenotype",
        "category",
        "eMERGE_III_EUR_case",
        "eMERGE_III_EUR_control",
    ],
)

In [34]:
emerge_traits_info["phecode"] = emerge_traits_info["phecode"].apply(
    lambda x: f"EUR_{x}"
)

In [35]:
emerge_traits_info = emerge_traits_info.set_index("phecode").sort_index()

In [36]:
emerge_traits_info = emerge_traits_info.rename(
    columns={
        "eMERGE_III_EUR_case": "eur_n_cases",
        "eMERGE_III_EUR_control": "eur_n_controls",
    }
)

In [37]:
emerge_traits_info.shape

(309, 4)

In [38]:
emerge_traits_info.head()

Unnamed: 0_level_0,eur_n_cases,eur_n_controls,phenotype,category
phecode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
EUR_008,1639,57495,Intestinal infection,infectious diseases
EUR_008.5,1024,57495,Bacterial enteritis,infectious diseases
EUR_008.52,893,57495,Intestinal infection due to C. difficile,infectious diseases
EUR_038,3172,50610,Septicemia,infectious diseases
EUR_038.3,1361,50610,Bacteremia,infectious diseases


In [39]:
assert emerge_traits_info.index.is_unique

## LVs errors

These results are used to avoid using LVs with high mean type I error in the null simulations.

In [40]:
lvs_typeIerr = pd.read_pickle(
    conf.RESULTS["GLS_NULL_SIMS"] / "lvs-null_sims-1000g_eur-prop_type_I_errors.pkl"
).set_index("lv")

In [41]:
lvs_typeIerr.shape

(987, 3)

In [42]:
lvs_typeIerr.head()

Unnamed: 0_level_0,1,5,10
lv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LV1,0.005,0.023,0.061
LV10,0.013,0.035,0.069
LV100,0.013,0.052,0.084
LV101,0.011,0.04,0.09
LV102,0.013,0.058,0.083


In [43]:
lvs_flagged = set(lvs_typeIerr[lvs_typeIerr["5"] > 0.07].index)

In [44]:
len(lvs_flagged)

127

# Load trait associations

Load trait associations for PhenomeXcan and eMERGE and show some statistics.

## PhenomeXcan

In [45]:
data_signif = phenomexcan_lv_trait_assocs[
    (~phenomexcan_lv_trait_assocs["lv"].isin(lvs_flagged))
    & (phenomexcan_lv_trait_assocs["fdr"] < 0.05)
]

In [46]:
data_signif.shape

(3450, 5)

In [47]:
data_signif["phenotype"].unique()

['1747_4', '22617_1136', 'E83', '6138_100', 'Z00', ..., 'BCAC_Overall_BreastCancer_EUR', 'GEFOS_Forearm', 'GIANT_HEIGHT', 'IBD.EUR.Ulcerative_Colitis', 'MAGNETIC_IDL.TG']
Length: 1176
Categories (1176, object): ['1747_4', '22617_1136', 'E83', '6138_100', ..., 'GEFOS_Forearm', 'GIANT_HEIGHT', 'IBD.EUR.Ulcerative_Colitis', 'MAGNETIC_IDL.TG']

In [48]:
data_signif["lv"].unique()

['LV620', 'LV194', 'LV187', 'LV209', 'LV981', ..., 'LV387', 'LV93', 'LV808', 'LV181', 'LV484']
Length: 686
Categories (686, object): ['LV620', 'LV194', 'LV187', 'LV209', ..., 'LV93', 'LV808', 'LV181', 'LV484']

## eMERGE

In [49]:
data_emerge_signif = emerge_lv_trait_assocs[
    (~emerge_lv_trait_assocs["lv"].isin(lvs_flagged))
    & (emerge_lv_trait_assocs["fdr"] < 0.05)
]

In [50]:
data_emerge_signif.shape

(196, 5)

In [51]:
data_emerge_signif["phenotype"].unique()

['EUR_440.2', 'EUR_250.24', 'EUR_411', 'EUR_362', 'EUR_286', ..., 'EUR_433.3', 'EUR_443.9', 'EUR_244', 'EUR_287', 'EUR_008.52']
Length: 81
Categories (81, object): ['EUR_440.2', 'EUR_250.24', 'EUR_411', 'EUR_362', ..., 'EUR_443.9', 'EUR_244', 'EUR_287', 'EUR_008.52']

In [52]:
data_emerge_signif["lv"].unique()

['LV472', 'LV182', 'LV348', 'LV504', 'LV445', ..., 'LV304', 'LV842', 'LV161', 'LV533', 'LV498']
Length: 116
Categories (116, object): ['LV472', 'LV182', 'LV348', 'LV504', ..., 'LV842', 'LV161', 'LV533', 'LV498']

# Autoimmune clusters

Analyze autoimmune clusters in PhenomeXcan and significant LV-trait associations here.
Then take a look if they replicated in eMERGE.

## PhenomeXcan

In [49]:
cluster13_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k13.pkl"
)["name"]

In [50]:
cluster13_lvs.shape

(20,)

In [51]:
cluster13_lvs

idx
57      LV57
987    LV987
54      LV54
942    LV942
844    LV844
948    LV948
155    LV155
983    LV983
864    LV864
984    LV984
271    LV271
502    LV502
453    LV453
610    LV610
605    LV605
913    LV913
568    LV568
342    LV342
425    LV425
11      LV11
Name: name, dtype: object

In [52]:
cluster26_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k26.pkl"
)["name"]

In [53]:
cluster8_lvs = pd.read_pickle(CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k8.pkl")[
    "name"
]

In [54]:
all_clusters_lvs = set(cluster13_lvs) | set(cluster26_lvs) | set(cluster8_lvs)

In [55]:
len(all_clusters_lvs)

42

In [56]:
cluster13_traits = get_cluster_traits(13)
cluster26_traits = get_cluster_traits(26)
cluster8_traits = get_cluster_traits(8)

In [57]:
all_clusters_traits = (
    set(cluster13_traits) | set(cluster26_traits) | set(cluster8_traits)
)

In [58]:
len(all_clusters_traits)

15

In [59]:
groups_signif = data_signif[
    data_signif["lv"].isin(all_clusters_lvs)
    & data_signif["phenotype"].isin(all_clusters_traits)
].sort_values("fdr")

In [60]:
groups_signif.shape

(32, 5)

In [61]:
groups_signif["lv"].unique()

['LV57', 'LV844', 'LV52', 'LV45', 'LV342', 'LV984', 'LV942']
Categories (7, object): ['LV57', 'LV844', 'LV52', 'LV45', 'LV342', 'LV984', 'LV942']

In [62]:
groups_signif["phenotype"].unique()

['20002_1226', '20003_1141191044', '2976_raw', '2986', '20002_1381', ..., 'K90', 'K11_COELIAC', '20002_1225', '6144_3', '20002_1456']
Length: 14
Categories (14, object): ['20002_1226', '20003_1141191044', '2976_raw', '2986', ..., 'K11_COELIAC', '20002_1225', '6144_3', '20002_1456']

## eMERGE

In [63]:
groups_emerge_signif = data_emerge_signif[
    data_emerge_signif["lv"].isin(all_clusters_lvs)
].sort_values("fdr")

In [64]:
groups_emerge_signif.shape

(11, 5)

In [65]:
groups_emerge_signif["lv"].unique()

['LV57', 'LV504', 'LV942', 'LV342', 'LV54']
Categories (5, object): ['LV57', 'LV504', 'LV942', 'LV342', 'LV54']

## Compare

In [68]:
# LVs shared
set(groups_signif["lv"].unique()).intersection(set(groups_emerge_signif["lv"].unique()))

{'LV342', 'LV57', 'LV942'}

In [66]:
with pd.option_context(
    "display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None
):
    display(groups_signif.shape)
    display(groups_signif)

(32, 5)

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
248724,20002_1226,"Non-cancer illness code, self-reported: hypothyroidism/myxoedema",LV57,7.382515e-09,3.8e-05
595161,20003_1141191044,Treatment/medication code: levothyroxine sodium (20003_1141191044),LV57,1.792978e-08,8.4e-05
3065622,2976_raw,Age diabetes diagnosed,LV844,1.017499e-07,0.000386
2131920,2986,Started insulin within one year diagnosis of diabetes,LV844,4.376232e-07,0.001299
762951,20002_1381,"Non-cancer illness code, self-reported: systemic lupus erythematosis/sle",LV52,4.646106e-07,0.001365
1134063,20002_1453,"Non-cancer illness code, self-reported: psoriasis",LV52,9.819095e-07,0.002535
1454838,6153_3,"Medication for cholesterol, blood pressure, diabetes, or take exogenous hormones: Insulin",LV45,1.064441e-06,0.002719
3472267,6177_3,"Medication for cholesterol, blood pressure or diabetes: Insulin",LV844,1.51481e-06,0.003615
1046221,20003_1140883066,Treatment/medication code: insulin product (20003_1140883066),LV844,2.556388e-06,0.005476
3472268,6177_3,"Medication for cholesterol, blood pressure or diabetes: Insulin",LV342,3.16033e-06,0.006487


In [67]:
with pd.option_context(
    "display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None
):
    display(groups_emerge_signif.shape)
    display(groups_emerge_signif)

(11, 5)

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
10859,EUR_286,Coagulation defects,LV57,1.303465e-16,1.325116e-11
18753,EUR_440.22,Atherosclerosis of native arteries of the extremities with intermittent claudication,LV504,1.369999e-10,2.611414e-06
175687,EUR_452,Other venous embolism and thrombosis,LV57,1.097275e-09,1.521138e-05
61195,EUR_452.2,Deep vein thrombosis [DVT],LV57,4.246591e-09,4.465993e-05
115479,EUR_702,Degenerative skin conditions and other dermatoses,LV942,3.171349e-07,0.001381725
83906,EUR_250.2,Type 2 diabetes,LV342,3.357968e-07,0.001416795
118448,EUR_250,Diabetes mellitus,LV342,3.551293e-07,0.001416795
84882,EUR_250.1,Type 1 diabetes,LV54,1.209912e-06,0.003884238
3,EUR_440.2,Atherosclerosis of the extremities,LV504,1.534424e-06,0.004499744
225036,EUR_244.4,Hypothyroidism NOS,LV57,4.974703e-06,0.01115588


# Cardiovascular clusters

## PhenomeXcan

In [69]:
cluster17_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k17.pkl"
)["name"]

In [70]:
cluster17_lvs.shape

(20,)

In [71]:
cluster25_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k25.pkl"
)["name"]

In [72]:
cluster21_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k21.pkl"
)["name"]

In [73]:
cluster28_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k28.pkl"
)["name"]

In [74]:
cluster11_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k11.pkl"
)["name"]

In [75]:
cluster16_lvs = pd.read_pickle(
    CLUSTERING_LVS_DIR / "cluster_interpreter-part29_k16.pkl"
)["name"]

In [76]:
all_clusters_lvs = (
    set(cluster17_lvs)
    | set(cluster25_lvs)
    | set(cluster21_lvs)
    | set(cluster28_lvs)
    | set(cluster11_lvs)
    | set(cluster16_lvs)
)

In [77]:
len(all_clusters_lvs)

99

In [78]:
cluster17_traits = get_cluster_traits(17)
cluster25_traits = get_cluster_traits(25)
cluster21_traits = get_cluster_traits(21)
cluster28_traits = get_cluster_traits(28)
cluster11_traits = get_cluster_traits(11)
cluster16_traits = get_cluster_traits(16)

In [79]:
all_clusters_traits = (
    set(cluster17_traits)
    | set(cluster25_traits)
    | set(cluster21_traits)
    | set(cluster28_traits)
    | set(cluster11_traits)
    | set(cluster16_traits)
)

In [80]:
len(all_clusters_traits)

255

In [81]:
groups_signif = data_signif[
    data_signif["lv"].isin(all_clusters_lvs)
    & data_signif["phenotype"].isin(all_clusters_traits)
].sort_values("fdr")

In [82]:
groups_signif.shape

(129, 5)

In [83]:
groups_signif["lv"].unique()

['LV21', 'LV455', 'LV260', 'LV136', 'LV12', ..., 'LV44', 'LV884', 'LV349', 'LV742', 'LV331']
Length: 47
Categories (47, object): ['LV21', 'LV455', 'LV260', 'LV136', ..., 'LV884', 'LV349', 'LV742', 'LV331']

In [84]:
groups_signif["phenotype"].unique()

['MAGNETIC_LDL.C', '2887', 'MAGNETIC_HDL.C', 'IGAP_Alzheimer', 'MAGNETIC_IDL.TG', ..., '1528', '46_raw', '6154_100', '1448_4', '1478']
Length: 85
Categories (85, object): ['MAGNETIC_LDL.C', '2887', 'MAGNETIC_HDL.C', 'IGAP_Alzheimer', ..., '46_raw', '6154_100', '1448_4', '1478']

## eMERGE

In [85]:
groups_emerge_signif = data_emerge_signif[
    data_emerge_signif["lv"].isin(all_clusters_lvs)
].sort_values("fdr")

In [86]:
groups_emerge_signif.shape

(23, 5)

In [87]:
groups_emerge_signif["lv"].unique()

['LV418', 'LV727', 'LV849', 'LV428', 'LV136', ..., 'LV637', 'LV796', 'LV330', 'LV93', 'LV423']
Length: 13
Categories (13, object): ['LV418', 'LV727', 'LV849', 'LV428', ..., 'LV796', 'LV330', 'LV93', 'LV423']

## Compare

In [92]:
# LVs shared
set(groups_signif["lv"].unique()).intersection(set(groups_emerge_signif["lv"].unique()))

{'LV136', 'LV21', 'LV418', 'LV574', 'LV637', 'LV727', 'LV796', 'LV849', 'LV93'}

In [88]:
with pd.option_context(
    "display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None
):
    display(groups_signif.shape)
    display(groups_signif.head(20))

(129, 5)

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
4024988,MAGNETIC_LDL.C,LDL Cholesterol NMR,LV21,2.883641e-17,1.081485e-12
2159556,2887,Number of cigarettes previously smoked daily,LV455,4.843525e-16,1.303818e-11
4010182,MAGNETIC_HDL.C,HDL Cholesterol NMR,LV21,1.187472e-15,3.034679e-11
4025973,IGAP_Alzheimer,Alzheimers Disease,LV21,1.172466e-13,1.9644e-09
4036832,MAGNETIC_IDL.TG,Triglycerides NMR,LV21,1.242413e-13,2.047606e-09
1022542,2267,Use of sun/uv protection,LV260,1.726427e-13,2.733724e-09
1716393,I25,Diagnoses - main ICD10: I25 Chronic ischaemic heart disease,LV136,2.294066e-13,3.522061e-09
4010186,MAGNETIC_HDL.C,HDL Cholesterol NMR,LV12,2.523395e-13,3.816108e-09
6912,E83,Diagnoses - main ICD10: E83 Disorders of mineral metabolism,LV847,6.949779e-13,9.261365e-09
1966108,6152_5,"Blood clot, DVT, bronchitis, emphysema, asthma, rhinitis, eczema, allergy diagnosed by doctor: Blood clot in the leg (DVT)",LV418,1.707087e-12,2.033305e-08


In [89]:
with pd.option_context(
    "display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None
):
    display(groups_emerge_signif.shape)
    display(groups_emerge_signif)

(23, 5)

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
10861,EUR_286,Coagulation defects,LV418,5.776146e-11,1e-06
246753,EUR_274.1,Gout,LV727,4.377635e-08,0.000334
189504,EUR_389,Hearing loss,LV849,1.313725e-07,0.000801
185562,EUR_274,Gout and other crystal arthropathies,LV727,1.816449e-07,0.000939
83905,EUR_250.2,Type 2 diabetes,LV428,3.08781e-07,0.001365
220101,EUR_411.4,Coronary atherosclerosis,LV136,3.588278e-07,0.001417
118451,EUR_250,Diabetes mellitus,LV428,3.878355e-07,0.001479
175688,EUR_452,Other venous embolism and thrombosis,LV418,4.967769e-07,0.001782
263531,EUR_272.1,Hyperlipidemia,LV21,1.398012e-06,0.004221
195428,EUR_272,Disorders of lipoid metabolism,LV21,1.524794e-06,0.0045


In [101]:
with pd.option_context(
    "display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None
):
    lv = "LV434"
    _tmp = groups_signif[groups_signif["lv"] == lv]
    display(_tmp)

    _tmp = groups_emerge_signif[groups_emerge_signif["lv"] == lv]
    display(_tmp)

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr


Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr


In [None]:
with pd.option_context(
    "display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None
):
    lv = "LV434"
    _tmp = groups_signif[groups_signif["lv"] == lv]
    display(_tmp)

    _tmp = groups_emerge_signif[groups_emerge_signif["lv"] == lv]
    display(_tmp)

Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr


Unnamed: 0,phenotype,phenotype_desc,lv,pvalue,fdr
