In [13]:
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'font.serif': 'lm',
    'font.size': '11',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
import pandas as pd
MODALITY = 'asymmetry'
IMPUTE_ID = 'mean_imputed'
SUBSAMPLED_ID = 'not_subsampled'
PARTITIONS_DIR = f"../../results/{MODALITY}/visualizeCCAOnPheno/STAGE00DATA/{IMPUTE_ID}/{SUBSAMPLED_ID}/props"
RESULTS_DIR = f"../../results/{MODALITY}/FUMA gene2func/joinedDatasets/{IMPUTE_ID}/{SUBSAMPLED_ID}"

In [14]:
def load_df(name):
    df = None
    for par in [1,4,5,6,7]:
        tab = pd.read_csv(f"{RESULTS_DIR}/par{par:02d}/{name}.txt",sep='\t')
        tab['partition'] = par
        if df is None:
            df = tab
        else:
            df = pd.concat([df, tab])
    return df
functions_df = load_df("GS")
deg_df = load_df("gtex_v8_ts_DEG")

In [15]:
functions_df.shape

(647, 9)

In [16]:
functions_df.head()

Unnamed: 0,Category,GeneSet,N_genes,N_overlap,p,adjP,genes,link,partition
0,GO_bp,GO_MICROTUBULE_CYTOSKELETON_ORGANIZATION,528,10,4.843584e-07,0.003214,TUBA1B:TUBA1A:TUBA1C:CHMP1A:SPIRE2:TUBB3:TUBB3...,http://www.gsea-msigdb.org/gsea/msigdb/cards/G...,1
1,GO_bp,GO_MICROTUBULE_BASED_PROCESS,726,11,1.119068e-06,0.003214,TUBA1B:TUBA1A:TUBA1C:RASGRP1:CHMP1A:SPIRE2:TUB...,http://www.gsea-msigdb.org/gsea/msigdb/cards/G...,1
2,GO_bp,GO_REGULATION_OF_CELLULAR_COMPONENT_BIOGENESIS,901,12,1.345885e-06,0.003214,TRABD2B:DACT1:THBS1:CHMP1A:CDK10:SPIRE2:DEF8:P...,http://www.gsea-msigdb.org/gsea/msigdb/cards/G...,1
3,GO_bp,GO_CYTOSKELETON_ORGANIZATION,1285,14,1.749016e-06,0.003214,TUBA1B:TUBA1A:TUBA1C:FMNL3:DAAM1:CHMP1A:CDK10:...,http://www.gsea-msigdb.org/gsea/msigdb/cards/G...,1
4,GO_bp,GO_ORGANELLE_LOCALIZATION,684,10,4.89174e-06,0.007191,TUBA1A:RASGRP1:CHMP1A:SPIRE2:DEF8:PLEKHM1:CRHR...,http://www.gsea-msigdb.org/gsea/msigdb/cards/G...,1


In [17]:
functions_df.Category.unique()

array(['GO_bp', 'GWAScatalog', 'Chemical_and_Genetic_pertubation',
       'GO_cc', 'Immunologic_signatures', 'Positional_gene_sets',
       'Wikipathways', 'Curated_gene_sets', 'Oncogenic_signatures',
       'KEGG', 'microRNA_targets', 'TF_targets', 'Reactome',
       'Canonical_Pathways', 'Hallmark_gene_sets', 'GO_mf', 'BioCarta',
       'Cancer_gene_neighborhoods', 'Computational_gene_sets'],
      dtype=object)

In [18]:
functions_df.sort_values('adjP').groupby(['Category','partition']).first()

Unnamed: 0_level_0,Unnamed: 1_level_0,GeneSet,N_genes,N_overlap,p,adjP,genes,link
Category,partition,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BioCarta,6,BIOCARTA_AKAPCENTROSOME_PATHWAY,14,2,6.962296e-05,0.02012103,PRKAG1:MAP2,http://www.gsea-msigdb.org/gsea/msigdb/cards/B...
Cancer_gene_neighborhoods,6,MORF_PTPRR,101,3,0.0001009689,0.0262971,DNAJC22:CRHR1:MAP2,http://www.gsea-msigdb.org/gsea/msigdb/cards/M...
Canonical_Pathways,1,REACTOME_ACTIVATION_OF_AMPK_DOWNSTREAM_OF_NMDARS,28,6,5.179024e-11,1.138867e-07,PRKAG1:TUBA1B:TUBA1A:TUBA1C:TUBB3:TUBB3:MAPT,http://www.gsea-msigdb.org/gsea/msigdb/cards/R...
Canonical_Pathways,6,REACTOME_ACTIVATION_OF_AMPK_DOWNSTREAM_OF_NMDARS,28,5,3.899867e-11,8.575808e-08,PRKAG1:TUBA1B:TUBA1A:TUBA1C:MAPT,http://www.gsea-msigdb.org/gsea/msigdb/cards/R...
Chemical_and_Genetic_pertubation,1,NIKOLSKY_BREAST_CANCER_16Q24_AMPLICON,52,14,8.473078e-26,2.79781e-22,CPNE7:CHMP1A:SPATA33:CDK10:ZNF276:FANCA:SPIRE2...,http://www.gsea-msigdb.org/gsea/msigdb/cards/N...
Chemical_and_Genetic_pertubation,4,NIKOLSKY_BREAST_CANCER_16Q24_AMPLICON,52,12,2.447235e-25,8.080769e-22,SPATA33:VPS9D1:ZNF276:FANCA:SPIRE2:TCF25:MC1R:...,http://www.gsea-msigdb.org/gsea/msigdb/cards/N...
Chemical_and_Genetic_pertubation,5,NIKOLSKY_BREAST_CANCER_16Q24_AMPLICON,52,15,8.915306999999999e-30,2.943834e-26,CPNE7:CHMP1A:SPATA33:CDK10:VPS9D1:ZNF276:FANCA...,http://www.gsea-msigdb.org/gsea/msigdb/cards/N...
Chemical_and_Genetic_pertubation,6,SOTIRIOU_BREAST_CANCER_GRADE_1_VS_3_UP,154,4,1.063315e-05,0.03511066,TUBA1B:TUBA1A:TUBA1C:TROAP,http://www.gsea-msigdb.org/gsea/msigdb/cards/S...
Chemical_and_Genetic_pertubation,7,NIKOLSKY_BREAST_CANCER_16Q24_AMPLICON,52,12,5.466857000000001e-28,1.805156e-24,SPATA33:VPS9D1:ZNF276:FANCA:SPIRE2:TCF25:MC1R:...,http://www.gsea-msigdb.org/gsea/msigdb/cards/N...
Computational_gene_sets,6,MODULE_198,301,4,0.0001443865,0.04129454,KMT2D:GTF2A2:MEIS1:MAP2,http://www.gsea-msigdb.org/gsea/msigdb/cards/M...


In [19]:
go_cc_per_partition_df = functions_df.sort_values('adjP').query('(Category=="GO_cc") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
go_mf_per_partition_df = functions_df.sort_values('adjP').query('(Category=="GO_mf") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
go_bp_per_partition_df = functions_df.sort_values('adjP').query('(Category=="GO_bp") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
canonical_pathways_per_partition_df = functions_df.sort_values('adjP').query('(Category=="Canonical_Pathways") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')

In [20]:
functions_per_partition_df = functions_df.sort_values('adjP').query('(Category=="GWAScatalog") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
functions_per_partition_df

partition,1,4,5,6,7
GeneSet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alcohol use disorder (total score),1.274981e-07,1.67788e-11,2.045014e-12,1.483805e-12,6.702837e-13
Alzheimer's disease in APOE e4- carriers,0.0003731101,2.038641e-05,8.412608e-05,7.638447e-06,3.616962e-06
Bipolar disorder,0.01380983,,,0.0001758043,
Blond vs. brown/black hair color,0.03096164,0.04447542,0.01070701,,0.01318466
Brown vs. black hair color,0.0002642386,0.001012013,6.32944e-05,0.03456817,0.0002804684
Chronotype,0.02426054,,,,
Cognitive ability,,,,0.04868617,0.03240468
Cognitive function,0.00508396,0.00982795,1.839249e-06,0.00594893,0.00287478
Dentate gyrus granule cell layer volume,,,8.412608e-05,,
Dentate gyrus molecular layer volume,,,0.0001071477,,


In [21]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpecFromSubplotSpec
def create_intensity_table_plot(df, color_labels=None):
    vals = np.around(-np.log10(df.values),2)
    if color_labels is None:
        norm = plt.Normalize(np.nanmin(vals), np.nanmax(vals))
        colours = plt.cm.Reds(norm(vals))
    else:
        colours = plt.cm.coolwarm(color_labels)
    fig, ax0 = plt.subplots(figsize=(20,3))
    ax0.get_xaxis().set_visible(False)
    ax0.get_yaxis().set_visible(False)
    spec = GridSpecFromSubplotSpec(1,len(df.columns), subplot_spec=ax0,wspace=0,hspace=0)
    colours[np.isnan(vals)] = [1,1,1,1]
    str_vals = [[x if np.isfinite(x) else '' for x in y ] for y in vals]
    the_table=ax0.table(cellText=str_vals, rowLabels=[x.replace('_',' ') for x in df.index], colLabels=['Partition ' + str(i) for i in df.columns], 
                    loc='bottom', cellLoc='center',
                        cellColours=colours)
    the_table.set_fontsize(20)
    the_table.scale(1, 4)
    for i in range(len(df.columns)):
        ax1 = fig.add_subplot(spec[0,i], xticks=[], yticks=[])
        with Image.open(PARTITIONS_DIR + '/' + str(df.columns[i]) + '.png') as im:
            ax1.imshow(im)
        ax1.get_xaxis().set_visible(False)
        ax1.get_yaxis().set_visible(False)
    plt.subplots_adjust(left=0.2, bottom=0.2)
        
    return fig

In [22]:
functions_fig = create_intensity_table_plot(functions_per_partition_df)

In [23]:
go_cc_per_partition_df.head()

partition,1,5,6
GeneSet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
GO_COLLAGEN_CONTAINING_EXTRACELLULAR_MATRIX,0.043457,,
GO_CYTOPLASMIC_MICROTUBULE,0.00583,,
GO_CYTOSKELETAL_PART,0.006446,,
GO_ENDOPLASMIC_RETICULUM_PART,0.038541,,
GO_EXTRACELLULAR_MATRIX,0.006446,,


In [24]:
go_cc_fig = create_intensity_table_plot(go_cc_per_partition_df)
go_bp_fig = create_intensity_table_plot(go_bp_per_partition_df)
go_mf_fig = create_intensity_table_plot(go_mf_per_partition_df)
canonical_pathways_fig = create_intensity_table_plot(canonical_pathways_per_partition_df)

In [25]:
deg_df.head()

Unnamed: 0,Category,GeneSet,N_genes,N_overlap,p,adjP,genes,partition
0,DEG.up,Adipose_Subcutaneous,1871,6,0.666941,1.0,ENSG00000204291:ENSG00000103264:ENSG0000017080...,1
1,DEG.up,Adipose_Visceral_Omentum,1596,5,0.681961,1.0,ENSG00000204291:ENSG00000123243:ENSG0000015592...,1
2,DEG.up,Adrenal_Gland,1374,3,0.875389,1.0,ENSG00000143995:ENSG00000115828:ENSG00000104415,1
3,DEG.up,Artery_Aorta,2391,16,0.009846,0.531705,ENSG00000204291:ENSG00000100592:ENSG0000016561...,1
4,DEG.up,Artery_Coronary,1805,13,0.011413,0.616307,ENSG00000204291:ENSG00000100592:ENSG0000016561...,1


In [26]:
deg_df = deg_df.sort_values('adjP').groupby(['GeneSet','partition']).first() # keep only lowest p value type per gene set and partition
deg_per_partition_up_df = deg_df.sort_values('adjP').query('(Category=="DEG.up") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
deg_per_partition_up_df['type'] = 'up'
deg_per_partition_down_df = deg_df.sort_values('adjP').query('(Category=="DEG.down") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
deg_per_partition_down_df['type'] = 'down'
deg_per_partition_twoside_df = deg_df.sort_values('adjP').query('(Category=="DEG.twoside") & (adjP<0.05)').pivot_table(columns='partition', values='adjP',index='GeneSet')
deg_per_partition_twoside_df['type'] = 'twoside'
df = pd.concat([deg_per_partition_up_df,deg_per_partition_twoside_df,deg_per_partition_down_df],axis=0,sort=True)
df.columns = [str(x) for x in df.columns]
df = df.sort_index(axis=1)
df = df.sort_index(axis=0)
df

Unnamed: 0_level_0,1,4,5,6,7,type
GeneSet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adipose_Visceral_Omentum,0.04614,,,,,down
Artery_Tibial,,,0.029513,,,twoside
Brain_Cerebellar_Hemisphere,,0.008082,6.1e-05,0.015283,0.000114,up
Brain_Cerebellum,0.044017,0.031497,9.4e-05,0.009849,6.6e-05,up
Brain_Hypothalamus,0.027668,,,,,twoside
Cells_Cultured_fibroblasts,,,0.000198,,,down
Esophagus_Gastroesophageal_Junction,0.005916,,,,,twoside
Esophagus_Muscularis,0.017499,,0.004678,,,twoside
Ovary,,,0.013086,,,twoside
Ovary,0.035704,,,,,down


In [27]:
labels = np.zeros(df.shape)
labels[df['type']=='up',:] = 1
labels[df['type']=='twoside',:] = 0.5

In [28]:
df

Unnamed: 0_level_0,1,4,5,6,7,type
GeneSet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adipose_Visceral_Omentum,0.04614,,,,,down
Artery_Tibial,,,0.029513,,,twoside
Brain_Cerebellar_Hemisphere,,0.008082,6.1e-05,0.015283,0.000114,up
Brain_Cerebellum,0.044017,0.031497,9.4e-05,0.009849,6.6e-05,up
Brain_Hypothalamus,0.027668,,,,,twoside
Cells_Cultured_fibroblasts,,,0.000198,,,down
Esophagus_Gastroesophageal_Junction,0.005916,,,,,twoside
Esophagus_Muscularis,0.017499,,0.004678,,,twoside
Ovary,,,0.013086,,,twoside
Ovary,0.035704,,,,,down


In [29]:
deg_fig = create_intensity_table_plot(df.drop(columns='type'), labels[:,:-1])

In [30]:
import os
OUTPUT_DIR = RESULTS_DIR+'/partitionsSummary/'
os.makedirs(OUTPUT_DIR,exist_ok=True)

deg_fig.savefig(OUTPUT_DIR + 'DEG.pdf', bbox_inches="tight")
functions_fig.savefig(OUTPUT_DIR + 'GWASCatalog.pdf', bbox_inches="tight")

In [31]:
go_cc_fig.savefig(OUTPUT_DIR + 'GO_cc.pdf', bbox_inches="tight")
go_mf_fig.savefig(OUTPUT_DIR + 'GO_mf.pdf', bbox_inches="tight")
go_bp_fig.savefig(OUTPUT_DIR + 'GO_bp.pdf', bbox_inches="tight")
canonical_pathways_fig.savefig(OUTPUT_DIR + 'canonical_pathways.pdf', bbox_inches="tight")