In [None]:
# Post Analysis 1: Calculate the lambda for each protein

import os
import math
import pandas as pd
import numpy as np
import tqdm
from scipy.stats import norm

# Step 1: Load the glm data
glm_data_dir = "~/discovery_data/pqtl_data/"
lambda_df = pd.DataFrame(columns=['Uniport', 'lambda'])
dirs = os.listdir(glm_data_dir)

# Step 2: Calculate the lambda for each protein
for d in tqdm.tqdm(dirs):
    print(d)
    uniport = d.split('.')[1]
    gwas = pd.read_csv(os.path.join(glm_data_dir, d), sep='\t')
    p_value = gwas['P'].to_numpy()
    z = norm.ppf(p_value / 2)
    lambda_val = np.round(np.nanmedian(z**2) / 0.454, 3)
    lambda_df = pd.concat([lambda_df, pd.DataFrame([{'Uniport': uniport, 'lambda': lambda_val}])], ignore_index=True)

# Step 3: Save the lambda DataFrame
lambda_df.to_csv("sentinel_lambda.csv", sep='\t', index=False)

In [None]:
# Post Analysis 2: Calculate the PVE for each protein

import os
import math
import pandas as pd
import numpy as np

# Step 1: Load the pQTL data
data = pd.read_csv("sentinel_qtls.csv", sep='\t')
pve_arr=[]

# Step 2: Calculate the PVE
for i, g in data.groupby(["gene_symbol","UniProt"]):
    # print(i, g.shape)
    g["PVE"] = g[["A1_FREQ", "BETA"]].apply(lambda x:2*(1-x["A1_FREQ"])*x["A1_FREQ"]*x["BETA"]*x["BETA"],axis=1)
    temp_arr = [i[0],i[1], g["PVE"].sum()]
    pve_arr.append(temp_arr)

# Step 3: Create the PVE DataFrame and save it
pve_df = pd.DataFrame(pve_arr, columns=["protein","UniProt", "PVE"])
pve_df.to_csv("sentinel_pve.csv", sep='\t', index=False)