In [1]:
import pandas as pd
from lpa_pipeline import freq_table_generator, appearance_table_generator

pd.options.mode.chained_assignment = None
project_path = "/mnt/vast/hpc/bvardarajan_lab/LPA_analysis"

In [2]:
encoded_result = pd.read_csv(
    f"{project_path}/dataset/tidied_output/filtered_result.csv",
    index_col = 0)
locus_table = pd.read_excel(
    f"{project_path}/dataset/locus_table_novelty_and_missense.xlsx", index_col = 0)
eigen_result = pd.read_csv(
    f"{project_path}/dataset/ethnicity_from_eigenstrat/eigenstrat_complete.csv",
    index_col = 0)
personal_info = pd.read_csv(
    f"{project_path}/dataset/phenotypes/WHICAP_pheno_lpa_202306022.csv", 
    index_col = 0).set_index("WES_ID").rename(columns = {"DEM03": "GENDER"})
personal_info = personal_info[(~personal_info["AGE"].isna()) & (~personal_info["GENDER"].isna())]

In [3]:
personal_info_with_ethnicity = pd.concat(
    [personal_info, eigen_result["ethnicity"]], 
    axis = 1, 
    join = "inner")
personal_info_with_ethnicity.loc[:, "GENDER"] = personal_info_with_ethnicity.loc[
    :, "GENDER"].replace({0: "Male", 1: "Female"})
personal_info_with_ethnicity.loc[:, "Ethnicity_Gender"] = personal_info_with_ethnicity.loc[
    :, "ethnicity"].str.cat(personal_info_with_ethnicity.loc[:, "GENDER"], sep = "_")

In [4]:
personal_info_with_ethnicity[["Ethnicity_Gender"]].head(5)

Unnamed: 0,Ethnicity_Gender
washei50436,AF_Male
washei36472,HISP_Female
washei36216,HISP_Female
washei36276,HISP_Female
washei36289,EU_Female


In [5]:
encoded_result.head(5)

Unnamed: 0,21-G/A,31-T/C,33-T/C,35-C/G,37-C/T,38-C/G,41-G/A,50-C/G,57-C/A,58-G/A,...,5086-A/G,5088-C/A,5088-C/T,5089-G/A,5092-G/T,5093-T/G,5096-T/C,5098-T/G,5099-C/G,5099-C/T
washei70623,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
washei70345,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
washei70493,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
washei71229,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
washei70625,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [6]:
ftg = freq_table_generator.FreqTableGenerator()
atg = appearance_table_generator.AppearanceTableGenerator(freq_table_generator = ftg)

In [7]:
ftg_freq_table_ethnicity_gender = ftg.generate_freq_table_with_rarity(
    class_info_table = personal_info_with_ethnicity[["Ethnicity_Gender"]],
    one_hot_table = encoded_result,
    class_variable = "Ethnicity_Gender")

In [8]:
ftg_freq_table_ethnicity_gender

Unnamed: 0,count_AF_Female,total_AF_Female_detected,total_AF_Female_population,freq_AF_Female,count_AF_Male,total_AF_Male_detected,total_AF_Male_population,freq_AF_Male,count_EU_Female,total_EU_Female_detected,...,count_HISP_Male,total_HISP_Male_detected,total_HISP_Male_population,freq_HISP_Male,AF_Female,AF_Male,EU_Female,EU_Male,HISP_Female,HISP_Male
21-G/A,240.0,686,782,0.349854,101.0,290,324,0.348276,211.0,474,...,273.0,499,512,0.547094,Common,Common,Common,Common,Common,Common
31-T/C,268.0,701,782,0.382311,110.0,296,324,0.371622,219.0,478,...,288.0,502,512,0.573705,Common,Common,Common,Common,Common,Common
33-T/C,1.0,700,782,0.001429,0.0,298,324,0.000000,0.0,478,...,0.0,503,512,0.000000,Rare,Not Detected,Not Detected,Not Detected,Not Detected,Not Detected
35-C/G,496.0,705,782,0.703546,196.0,297,324,0.659933,260.0,479,...,308.0,501,512,0.614770,Common,Common,Common,Common,Common,Common
37-C/T,3.0,704,782,0.004261,4.0,299,324,0.013378,18.0,478,...,13.0,502,512,0.025896,Rare,Common,Common,Common,Common,Common
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5093-T/G,1.0,782,782,0.001279,0.0,324,324,0.000000,0.0,517,...,0.0,512,512,0.000000,Rare,Not Detected,Not Detected,Not Detected,Not Detected,Not Detected
5096-T/C,0.0,782,782,0.000000,0.0,324,324,0.000000,5.0,517,...,1.0,512,512,0.001953,Not Detected,Not Detected,Rare,Common,Rare,Rare
5098-T/G,0.0,782,782,0.000000,0.0,324,324,0.000000,0.0,517,...,0.0,512,512,0.000000,Not Detected,Not Detected,Not Detected,Not Detected,Rare,Not Detected
5099-C/G,2.0,782,782,0.002558,0.0,324,324,0.000000,0.0,517,...,0.0,512,512,0.000000,Rare,Not Detected,Not Detected,Not Detected,Not Detected,Not Detected


# Overall

In [9]:
atg.generate_appearance_table(ftg_freq_table_ethnicity_gender)

Unnamed: 0,Ethnicity,Rarity,sub_class,Male_appeared_in_sub_class,Female_appeared_in_sub_class,Both_appeared_in_sub_class
0,AF,Common,441,440,441,440
1,AF,Rare,532,250,435,153
2,HISP,Common,403,402,403,402
3,HISP,Rare,658,357,568,267
4,EU,Common,365,365,365,365
5,EU,Rare,327,203,230,106


# New SNPs

In [10]:
ftg_freq_table_ethnicity_gender_new_snps = ftg_freq_table_ethnicity_gender[
    ftg_freq_table_ethnicity_gender.index.isin(locus_table[locus_table["novel"] == True].index)]

In [11]:
atg.generate_appearance_table(ftg_freq_table_ethnicity_gender_new_snps)

Unnamed: 0,Ethnicity,Rarity,sub_class,Male_appeared_in_sub_class,Female_appeared_in_sub_class,Both_appeared_in_sub_class
0,AF,Common,409,408,409,408
1,AF,Rare,487,222,397,132
2,HISP,Common,370,369,370,369
3,HISP,Rare,600,325,519,244
4,EU,Common,338,338,338,338
5,EU,Rare,300,185,208,93


# Previously found SNPS

In [12]:
ftg_freq_table_ethnicity_gender_coassin_snps = ftg_freq_table_ethnicity_gender[
    ~ftg_freq_table_ethnicity_gender.index.isin(locus_table[locus_table["novel"] == True].index)]

In [13]:
atg.generate_appearance_table(ftg_freq_table_ethnicity_gender_coassin_snps)

Unnamed: 0,Ethnicity,Rarity,sub_class,Male_appeared_in_sub_class,Female_appeared_in_sub_class,Both_appeared_in_sub_class
0,AF,Common,32,32,32,32
1,AF,Rare,45,28,38,21
2,HISP,Common,33,33,33,33
3,HISP,Rare,58,32,49,23
4,EU,Common,27,27,27,27
5,EU,Rare,27,18,22,13


# New Exons

In [14]:
ftg_freq_table_ethnicity_gender_new_exons = ftg_freq_table_ethnicity_gender.loc[
    locus_table.loc[
        (locus_table["novel"] == True) & 
        (locus_table["coding"] == "Exon")
    ].index
]

In [15]:
atg.generate_appearance_table(ftg_freq_table_ethnicity_gender_new_exons)

Unnamed: 0,Ethnicity,Rarity,sub_class,Male_appeared_in_sub_class,Female_appeared_in_sub_class,Both_appeared_in_sub_class
0,AF,Common,48,48,48,48
1,AF,Rare,57,26,47,16
2,HISP,Common,41,41,41,41
3,HISP,Rare,72,38,54,20
4,EU,Common,39,39,39,39
5,EU,Rare,39,22,28,11


# Introns

In [16]:
ftg_freq_table_ethnicity_gender_intron = ftg_freq_table_ethnicity_gender.loc[
    locus_table.loc[
        (locus_table["coding"] == "Intron")
    ].index
]

In [17]:
atg.generate_appearance_table(ftg_freq_table_ethnicity_gender_intron)

Unnamed: 0,Ethnicity,Rarity,sub_class,Male_appeared_in_sub_class,Female_appeared_in_sub_class,Both_appeared_in_sub_class
0,AF,Common,361,360,361,360
1,AF,Rare,430,196,350,116
2,HISP,Common,329,328,329,328
3,HISP,Rare,528,287,465,224
4,EU,Common,299,299,299,299
5,EU,Rare,261,163,180,82


# Haploviews info

In [None]:
from lpa_pipeline import haplotview_generator

In [None]:
hg = HaploviewGenerator(
    encoded_result = encoded_result,
    locus_table = locus_table,
    output_path = "/mnt/vast/hpc/bvardarajan_lab/LPA_analysis/data_analysis_result/paper_1_output"
)

In [2]:
HTN_variant_list = ['166-G/A','387-G/C','402-G/A','723-A/G','727-T/C','4527-A/T','4541-G/A','4653-A/T','4785-C/A']

In [None]:
hg.haplotype_output(
    variant_list = HTN_variant_list,
    output_label = "HTN_associated_variant"
)

linkage_HTN_associated_variant.txt and map_HTN_associated_variant.map will be saved under /mnt/vast/hpc/bvardarajan_lab/LPA_analysis/data_analysis_result/paper_1_output as the linkage format input