# Description

It uses the PhenomeXcan traits to EFO mapping files to group traits that end up having the same EFO label. This only combines the S-MultiXcan results (p-values) by taking the minimum p-value across all traits with the same EFO label.

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.display import display
import numpy as np
import pandas as pd

import conf
from data.cache import read_data
from entity import Trait

# Load S-MultiXcan

## z-scores (EFO-mapped)

In [3]:
smultixcan_zscores_combined = read_data(
    conf.PHENOMEXCAN["SMULTIXCAN_EFO_PARTIAL_MASHR_ZSCORES_FILE"]
)

In [4]:
smultixcan_zscores_combined.shape

(22515, 3752)

In [5]:
smultixcan_zscores_combined.head()

Unnamed: 0_level_0,100001_raw-Food_weight,100002_raw-Energy,100003_raw-Protein,100004_raw-Fat,100005_raw-Carbohydrate,100006_raw-Saturated_fat,100007_raw-Polyunsaturated_fat,100008_raw-Total_sugars,100009_raw-Englyst_dietary_fibre,100010-Portion_size,...,visual impairment,vitiligo,vitreous body disease,vocal cord polyp,voice disorders,wellbeing measurement AND family relationship,wheezing,whooping cough,worry measurement,wrist fracture
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,1.145442,0.724557,0.090876,0.298165,1.134347,1.371138,0.065718,0.794317,0.600342,0.317652,...,0.360518,1.351624,1.157695,0.835289,1.173072,1.33728,1.743822,1.017226,1.512184,0.972241
ENSG00000000457,0.618066,1.028131,2.21842,0.762584,0.934418,0.192993,1.08023,0.765997,0.375898,0.678731,...,2.134504,0.12783,0.53469,0.120516,0.517464,2.545363,0.673331,2.003092,0.344,2.033122
ENSG00000000460,0.515724,0.403596,1.251359,0.433091,0.413466,0.246261,1.236151,0.82743,0.571985,0.782174,...,1.768905,0.992408,0.548215,0.412341,1.499415,1.36678,0.443318,0.41763,0.225934,1.613246
ENSG00000000938,0.280781,0.25391,0.879148,0.352705,0.051846,0.184212,0.148566,0.009989,0.363751,0.374514,...,0.656552,2.046041,2.746832,0.108211,1.008258,0.755695,0.896228,0.875047,0.476405,1.693057
ENSG00000000971,0.548127,0.389877,0.723469,1.16725,0.315952,0.324939,1.613932,0.311432,0.333548,1.807243,...,0.260482,0.646204,1.08024,0.67833,1.465358,0.307672,0.118376,1.419812,2e-06,1.040737


## p-values (original)

In [6]:
smultixcan_pvalues = read_data(conf.PHENOMEXCAN["SMULTIXCAN_MASHR_PVALUES_FILE"])

In [7]:
smultixcan_pvalues.shape

(22515, 4091)

In [8]:
smultixcan_pvalues.head()

Unnamed: 0_level_0,20096_1-Size_of_red_wine_glass_drunk_small_125ml,2345-Ever_had_bowel_cancer_screening,N49-Diagnoses_main_ICD10_N49_Inflammatory_disorders_of_male_genital_organs_not_elsewhere_classified,100011_raw-Iron,5221-Index_of_best_refractometry_result_right,20003_1141150624-Treatmentmedication_code_zomig_25mg_tablet,S69-Diagnoses_main_ICD10_S69_Other_and_unspecified_injuries_of_wrist_and_hand,20024_1136-Job_code_deduced_Information_and_communication_technology_managers,20002_1385-Noncancer_illness_code_selfreported_allergy_or_anaphylactic_reaction_to_food,G6_SLEEPAPNO-Sleep_apnoea,...,Astle_et_al_2016_Sum_basophil_neutrophil_counts,RA_OKADA_TRANS_ETHNIC,pgc.scz2,PGC_ADHD_EUR_2017,MAGIC_FastingGlucose,Astle_et_al_2016_Red_blood_cell_count,SSGAC_Depressive_Symptoms,BCAC_ER_positive_BreastCancer_EUR,IBD.EUR.Inflammatory_Bowel_Disease,Astle_et_al_2016_High_light_scatter_reticulocyte_count
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.865429,0.918314,0.810683,0.374671,0.189032,0.140981,0.467741,0.129427,0.19368,0.285479,...,0.41621,0.782554,0.609467,0.980281,0.666504,0.409761,0.71331,0.168319,0.460244,0.765506
ENSG00000000457,0.174192,0.064765,0.889194,0.896938,0.448596,0.269602,0.540261,0.068405,0.041813,0.313427,...,0.14936,0.512603,0.010907,0.228982,0.607081,0.812484,0.678749,0.918971,0.311187,0.344574
ENSG00000000460,0.879969,0.240715,0.238228,0.567555,0.92132,0.825036,0.78223,0.644525,0.392273,0.840014,...,0.50352,0.764147,0.587969,0.30146,0.629621,0.486664,0.736509,0.9336,0.000477,0.321223
ENSG00000000938,0.19267,0.400054,0.114353,0.4707,0.889202,1.1e-05,0.899764,0.212352,0.829671,0.372348,...,0.899212,0.961678,0.059247,0.588855,0.898525,0.135045,0.954998,0.08822,0.176497,0.304281
ENSG00000000971,0.180632,0.79306,0.490585,0.088752,0.744531,0.949639,0.253817,0.377408,0.971655,0.070266,...,0.390618,0.093824,0.020391,0.109883,0.870551,0.99545,0.00266,0.421588,0.656851,0.868416


# Get PhenomeXcan traits

In [9]:
phenomexcan_fullcode_to_traits = {
    (trait_obj := Trait.get_trait(full_code=trait_name)).full_code: trait_obj
    for trait_name in smultixcan_pvalues.columns
}

In [10]:
len(phenomexcan_fullcode_to_traits)

4091

In [11]:
assert len(phenomexcan_fullcode_to_traits) == smultixcan_pvalues.columns.shape[0]

# Change/combine traits in S-MultiXcan results

## Get a list of EFO labels for PhenomeXcan traits

In [12]:
traits_efo_labels = [
    t.get_efo_info().label
    if (t := phenomexcan_fullcode_to_traits[c]).get_efo_info() is not None
    else t.full_code
    for c in smultixcan_pvalues.columns
]

In [13]:
len(traits_efo_labels)

4091

In [14]:
traits_efo_labels[:10]

['20096_1-Size_of_red_wine_glass_drunk_small_125ml',
 '2345-Ever_had_bowel_cancer_screening',
 'male reproductive system disease',
 '100011_raw-Iron',
 '5221-Index_of_best_refractometry_result_right',
 '20003_1141150624-Treatmentmedication_code_zomig_25mg_tablet',
 'injury',
 '20024_1136-Job_code_deduced_Information_and_communication_technology_managers',
 'food allergy',
 'G6_SLEEPAPNO-Sleep_apnoea']

## Get `min(p-value)` for same EFO labels

In [15]:
smultixcan_pvalues_combined = smultixcan_pvalues.groupby(
    traits_efo_labels, axis=1
).min()

In [16]:
smultixcan_pvalues_combined.shape

(22515, 3752)

In [17]:
smultixcan_pvalues_combined.head()

Unnamed: 0_level_0,100001_raw-Food_weight,100002_raw-Energy,100003_raw-Protein,100004_raw-Fat,100005_raw-Carbohydrate,100006_raw-Saturated_fat,100007_raw-Polyunsaturated_fat,100008_raw-Total_sugars,100009_raw-Englyst_dietary_fibre,100010-Portion_size,...,visual impairment,vitiligo,vitreous body disease,vocal cord polyp,voice disorders,wellbeing measurement AND family relationship,wheezing,whooping cough,worry measurement,wrist fracture
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.252026,0.468724,0.927591,0.765578,0.256649,0.170332,0.947602,0.427011,0.548278,0.750749,...,0.71846,0.176496,0.246989,0.403555,0.240767,0.181131,0.08119,0.309046,0.130487,0.274762
ENSG00000000457,0.536532,0.303888,0.026526,0.445712,0.350088,0.846965,0.28004,0.443678,0.706993,0.497308,...,0.032802,0.898283,0.592864,0.904074,0.604832,0.010916,0.500737,0.045167,0.730846,0.107098
ENSG00000000460,0.606047,0.68651,0.210803,0.664948,0.679265,0.80548,0.216403,0.407993,0.567332,0.434112,...,0.07691,0.320999,0.583544,0.68009,0.133766,0.171694,0.657535,0.676218,0.821253,0.165486
ENSG00000000938,0.778878,0.799565,0.379321,0.72431,0.958652,0.853847,0.881896,0.99203,0.716044,0.708022,...,0.511469,0.040752,0.006017,0.913828,0.313331,0.449832,0.370131,0.381548,0.633786,0.091551
ENSG00000000971,0.583604,0.696627,0.469392,0.24311,0.752039,0.745227,0.106542,0.755472,0.73872,0.070724,...,0.794492,0.518148,0.280036,0.497563,0.142823,0.758332,0.90577,0.155662,0.999998,0.332345


### Keep same order of genes and traits as in `z_scores` data

In [18]:
assert set(smultixcan_pvalues_combined.index) == set(smultixcan_zscores_combined.index)

In [19]:
assert set(smultixcan_pvalues_combined.columns) == set(
    smultixcan_zscores_combined.columns
)

In [20]:
smultixcan_pvalues_combined = smultixcan_pvalues_combined.loc[
    smultixcan_zscores_combined.index, smultixcan_zscores_combined.columns
]

In [21]:
smultixcan_pvalues_combined.shape

(22515, 3752)

### Do we have NaN values?

In [22]:
smultixcan_pvalues_combined.isna().any().any()

True

In [23]:
smultixcan_pvalues_combined.isna().sum()

100001_raw-Food_weight                           260
100002_raw-Energy                                260
100003_raw-Protein                               260
100004_raw-Fat                                   260
100005_raw-Carbohydrate                          260
                                                ... 
wellbeing measurement AND family relationship    260
wheezing                                         260
whooping cough                                   260
worry measurement                                260
wrist fracture                                   260
Length: 3752, dtype: int64

In [24]:
smultixcan_pvalues_combined.dropna(axis=0).shape

(22195, 3752)

### Are we getting those NaN values from `z-scores == 0`?

In [25]:
zscores_zeros = (smultixcan_zscores_combined == 0.0).values
display(zscores_zeros.shape)

zscore_zeros_idx = np.where(zscores_zeros)
display(zscore_zeros_idx[0].shape)

(22515, 3752)

(965771,)

In [26]:
pvalues_nans = (smultixcan_pvalues_combined.isna()).values
display(pvalues_nans.shape)

pvalues_nans_idx = np.where(pvalues_nans)
display(pvalues_nans_idx[0].shape)

(22515, 3752)

(965604,)

In [27]:
np.array_equal(zscores_zeros, pvalues_nans)

False

NaN pvalues do not exactly match zero z-scores. Let's see what's going on

In [28]:
non_equal_idxs = np.where(zscores_zeros != pvalues_nans)
display(non_equal_idxs)

(array([  437,  1921,  2823,  2978,  3034,  3153,  3513,  4534,  4699,
         5029,  5137,  5161,  5521,  6020,  6158,  6986,  7642,  7761,
         8343,  9249,  9374, 10667, 10946, 11163, 11364, 11389, 11761,
        11852, 11930, 12027, 12263, 12533, 12661, 12693, 12956, 12991,
        13184, 13509, 13654, 13877, 14020, 14490, 14685, 14758, 14943,
        15105, 15228, 15356, 15453, 15492, 15494, 15649, 16744, 16770,
        16802, 16952, 16963, 17017, 17092, 17125, 17149, 17217, 17223,
        17264, 17266, 17328, 17439, 17443, 17449, 17461, 17503, 17508,
        17520, 17536, 17546, 17568, 17672, 17681, 17724, 17758, 17803,
        17929, 17993, 18001, 18010, 18108, 18184, 18238, 18247, 18286,
        18337, 18356, 18373, 18384, 18385, 18494, 18529, 18603, 18732,
        18862, 18866, 18881, 19028, 19085, 19107, 19119, 19124, 19154,
        19172, 19218, 19226, 19272, 19331, 19345, 19356, 19365, 19367,
        19423, 19441, 19457, 19476, 19492, 19501, 19529, 19606, 19615,
      

In [29]:
non_equal_idxs[0].shape, non_equal_idxs[1].shape

((167,), (167,))

`z-scores` and `p-values` data version differ in just a few positions, so it is mostly because of `z_scores == 0.0`

### What's in the differing positions?

In [30]:
zscores_values = smultixcan_zscores_combined.iloc[non_equal_idxs].stack()

In [31]:
zscores_values.shape

(27889,)

In [32]:
zscores_values.head()

gene_name                                    
ENSG00000029363  SSGAC_Education_Years_Pooled    0.0
                 SSGAC_Education_Years_Pooled    0.0
                 SSGAC_Education_Years_Pooled    0.0
                 SSGAC_Education_Years_Pooled    0.0
                 SSGAC_Education_Years_Pooled    0.0
dtype: float64

In [33]:
zscores_values.describe()

count    27889.000000
mean         0.234956
std          0.656286
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          5.391299
dtype: float64

In [34]:
zscores_values.sort_values(ascending=False)

gene_name                                         
ENSG00000143032  SSGAC_Education_Years_Pooled         5.391299
                 SSGAC_Education_Years_Pooled         5.391299
                 SSGAC_Education_Years_Pooled         5.391299
                 SSGAC_Education_Years_Pooled         5.391299
                 SSGAC_Education_Years_Pooled         5.391299
                                                        ...   
ENSG00000250078  fasting blood glucose measurement    0.000000
                 fasting blood glucose measurement    0.000000
                 fasting blood glucose measurement    0.000000
                 fasting blood glucose measurement    0.000000
ENSG00000029363  SSGAC_Education_Years_Pooled         0.000000
Length: 27889, dtype: float64

In [35]:
pvalues_values = smultixcan_pvalues_combined.iloc[non_equal_idxs].stack()

In [36]:
pvalues_values.shape

(27879,)

In [37]:
pvalues_values.head()

gene_name                                    
ENSG00000029363  SSGAC_Education_Years_Pooled    1.0
                 SSGAC_Education_Years_Pooled    1.0
                 SSGAC_Education_Years_Pooled    1.0
                 SSGAC_Education_Years_Pooled    1.0
                 SSGAC_Education_Years_Pooled    1.0
dtype: float64

In [38]:
pvalues_values.describe()

count    2.787900e+04
mean     8.779993e-01
std      2.725342e-01
min      6.995008e-08
25%      1.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
dtype: float64

In [39]:
pvalues_values.sort_values(ascending=False)

gene_name                                    
ENSG00000283648  SSGAC_Education_Years_Pooled    1.000000e+00
ENSG00000227930  SSGAC_Education_Years_Pooled    1.000000e+00
                 SSGAC_Education_Years_Pooled    1.000000e+00
                 SSGAC_Education_Years_Pooled    1.000000e+00
                 SSGAC_Education_Years_Pooled    1.000000e+00
                                                     ...     
ENSG00000143032  SSGAC_Education_Years_Pooled    6.995008e-08
                 SSGAC_Education_Years_Pooled    6.995008e-08
                 SSGAC_Education_Years_Pooled    6.995008e-08
                 SSGAC_Education_Years_Pooled    6.995008e-08
                 SSGAC_Education_Years_Pooled    6.995008e-08
Length: 27879, dtype: float64

## Testing

### Stats

In [40]:
_stats = smultixcan_pvalues_combined.stack().describe()
display(_stats.apply(str))

count             83510676.0
mean     0.47207421615035794
std       0.2937895450160361
min                   1e-320
25%      0.21284248873443484
50%      0.46160010110866134
75%       0.7248518311255727
max                      1.0
dtype: object

In [41]:
assert _stats["min"] > 0.0

In [42]:
assert _stats["max"] <= 1.0

### Same traits as in z-scores version

In [43]:
assert smultixcan_pvalues_combined.index.equals(smultixcan_zscores_combined.index)

In [44]:
assert smultixcan_pvalues_combined.columns.equals(smultixcan_zscores_combined.columns)

### EFO label (asthma) which combined three PhenomeXcan traits.

In [45]:
_asthma_traits = [
    "22127-Doctor_diagnosed_asthma",
    "20002_1111-Noncancer_illness_code_selfreported_asthma",
    "J45-Diagnoses_main_ICD10_J45_Asthma",
]

In [46]:
smultixcan_pvalues[_asthma_traits]

Unnamed: 0_level_0,22127-Doctor_diagnosed_asthma,20002_1111-Noncancer_illness_code_selfreported_asthma,J45-Diagnoses_main_ICD10_J45_Asthma
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSG00000000419,0.743650,0.479481,0.420807
ENSG00000000457,0.276471,0.483925,0.224487
ENSG00000000460,0.128293,0.487002,0.092744
ENSG00000000938,0.362071,0.020284,0.508478
ENSG00000000971,0.460320,0.073722,0.482734
...,...,...,...
ENSG00000284430,0.334419,0.447163,0.139689
ENSG00000284452,0.631494,0.610971,0.283818
ENSG00000284513,0.250116,0.168155,0.184693
ENSG00000284526,0.762563,0.995128,0.643106


In [47]:
_tmp = smultixcan_pvalues[_asthma_traits]
display(_tmp[_tmp.isna().any(axis=1)])

Unnamed: 0_level_0,22127-Doctor_diagnosed_asthma,20002_1111-Noncancer_illness_code_selfreported_asthma,J45-Diagnoses_main_ICD10_J45_Asthma
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSG00000077327,,,
ENSG00000107187,,,
ENSG00000109181,,,
ENSG00000117262,,,
ENSG00000117281,,,
...,...,...,...
ENSG00000282160,,,
ENSG00000282940,,,
ENSG00000283095,,,
ENSG00000283324,,,


In [48]:
_trait = "asthma"

_gene = "ENSG00000000419"
assert smultixcan_pvalues_combined.loc[_gene, _trait].round(3) == 0.421

_gene = "ENSG00000284526"
assert smultixcan_pvalues_combined.loc[_gene, _trait].round(3) == 0.643

_gene = "ENSG00000000938"
assert smultixcan_pvalues_combined.loc[_gene, _trait].round(3) == 0.020

_gene = "ENSG00000077327"
assert pd.isnull(smultixcan_pvalues_combined.loc[_gene, _trait])

### PhenomeXcan trait which has no EFO label.

In [49]:
_trait = "100001_raw-Food_weight"

In [50]:
smultixcan_pvalues[_trait]

gene_name
ENSG00000000419    0.252026
ENSG00000000457    0.536532
ENSG00000000460    0.606047
ENSG00000000938    0.778878
ENSG00000000971    0.583604
                     ...   
ENSG00000284430    0.900540
ENSG00000284452    0.112308
ENSG00000284513    0.127939
ENSG00000284526    0.880024
ENSG00000284552    0.312427
Name: 100001_raw-Food_weight, Length: 22515, dtype: float64

In [51]:
_gene = "ENSG00000284513"
assert smultixcan_pvalues_combined.loc[_gene, _trait].round(3) == 0.128

_gene = "ENSG00000000971"
assert smultixcan_pvalues_combined.loc[_gene, _trait].round(3) == 0.584

# Save full (all traits, some with EFO, some not)

In [52]:
smultixcan_pvalues_combined.shape

(22515, 3752)

In [53]:
smultixcan_pvalues_combined.head()

Unnamed: 0_level_0,100001_raw-Food_weight,100002_raw-Energy,100003_raw-Protein,100004_raw-Fat,100005_raw-Carbohydrate,100006_raw-Saturated_fat,100007_raw-Polyunsaturated_fat,100008_raw-Total_sugars,100009_raw-Englyst_dietary_fibre,100010-Portion_size,...,visual impairment,vitiligo,vitreous body disease,vocal cord polyp,voice disorders,wellbeing measurement AND family relationship,wheezing,whooping cough,worry measurement,wrist fracture
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.252026,0.468724,0.927591,0.765578,0.256649,0.170332,0.947602,0.427011,0.548278,0.750749,...,0.71846,0.176496,0.246989,0.403555,0.240767,0.181131,0.08119,0.309046,0.130487,0.274762
ENSG00000000457,0.536532,0.303888,0.026526,0.445712,0.350088,0.846965,0.28004,0.443678,0.706993,0.497308,...,0.032802,0.898283,0.592864,0.904074,0.604832,0.010916,0.500737,0.045167,0.730846,0.107098
ENSG00000000460,0.606047,0.68651,0.210803,0.664948,0.679265,0.80548,0.216403,0.407993,0.567332,0.434112,...,0.07691,0.320999,0.583544,0.68009,0.133766,0.171694,0.657535,0.676218,0.821253,0.165486
ENSG00000000938,0.778878,0.799565,0.379321,0.72431,0.958652,0.853847,0.881896,0.99203,0.716044,0.708022,...,0.511469,0.040752,0.006017,0.913828,0.313331,0.449832,0.370131,0.381548,0.633786,0.091551
ENSG00000000971,0.583604,0.696627,0.469392,0.24311,0.752039,0.745227,0.106542,0.755472,0.73872,0.070724,...,0.794492,0.518148,0.280036,0.497563,0.142823,0.758332,0.90577,0.155662,0.999998,0.332345


## Pickle (binary)

In [54]:
output_file = conf.PHENOMEXCAN["SMULTIXCAN_EFO_PARTIAL_MASHR_PVALUES_FILE"]
display(output_file)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/data/phenomexcan/gene_assoc/smultixcan-efo_partial-mashr-pvalues.pkl')

In [55]:
smultixcan_pvalues_combined.to_pickle(output_file)

## TSV (text)

In [56]:
# tsv format
output_text_file = output_file.with_suffix(".tsv.gz")
display(output_text_file)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/data/phenomexcan/gene_assoc/smultixcan-efo_partial-mashr-pvalues.tsv.gz')

In [57]:
smultixcan_pvalues_combined.to_csv(
    output_text_file, sep="\t", index=True, float_format="%.5e"
)