In [None]:
def main(): 
    import json
    from rpy2 import robjects
    import  graph_construction as function
    import pandas as pd
    import os

    #CARICAMENTO FILE CONFIGURAZIONE
    with open("config.json") as f:
        data=json.load(f)

    #Definizione pathway
    path_data_mutational=data["Paths"]["data_mutational"]
    path_clinical_sample=data["Paths"]["data_clinical_sample"]
    path_clinical_patient=data["Paths"]["data_clinical_patient"]
    column_mutation_name=data["Mutation"]["column_mutation_name"]
    gene_interest=data["Mutation"]["gene_of_interest"]
    path_save=data["Paths"]["output_folder"]
    column_gene=data["Mutation"]["column_gene_name"]
    column_hgvsp=data["Mutation"]["column_hgvsp_short"]
    column_hgvsc=data["Mutation"]["column_hgvsc"]
    column_variant_classification=data["Mutation"]["column_variant_classification"]
    sample_name=data["Clinical_data"]["column_sample_name"]
    patient_name=data["Clinical_data"]["column_patient_name"]
    vaf=data["Mutation"]["vaf"]
    vaf_score=data["Mutation"]["vaf_score"]
    column_vaf=data["Mutation"]["vaf_column"]
    adjusted=data["Enrichment"]["adjusted"]
    threshold=data["Enrichment"]["threshold"]

    #caricamento dei dataframes
    data_mutational,data_clinical_sample,data_clinical_patient=function.read_file(path_data_mutational,path_clinical_sample,path_clinical_patient)
    
    #creazione delle mappe pazienti e varianti + creazione del grafo
    if column_mutation_name=="":
        data_mutational=function.adding_category_mutation(data_mutational,column_gene,column_hgvsp,column_variant_classification,column_hgvsc)

    #filtraggio per la vaf se sempresente
    if vaf and column_vaf!="":
        data_mutational=data_mutational[column_vaf]>=vaf_score
    elif vaf and column_vaf=="":
        data_mutational['t_AF']=data_mutational.apply(function.calculated_vaf, axis=1)


    map_patients,map_variants,map_consequence=function.create_maps(data_mutational,data,column_mutation_name)
    graph=function.graph_creation(map_patients,map_variants)

    #plot tipi di mutazione geni di interesse
    if len(gene_interest)!=0:
        function.plot_mutation_gene(map_consequence,gene_interest,path_save)

    #clusterizzazione
    seed=function.selected_seed(graph)
    dendro=function.leiden_clustering(graph,seed)

    #gestione parte grafica del grafo (aggiunta colori + file per cytoscape)
    graph=function.adding_graph_color(graph,dendro)
    if not os.path.exists(path_save):
        os.makedirs(path_save)
    function.write_graph_to_cytoscape(graph,path_save)


    #creazione della mappa cluster e attribuzione del cluster i pazienti e alle varianti
    map_cluster=function.map_cluster_creation(graph,dendro)
    map_patients,map_variants=function.adding_cluster_to_map(map_cluster,map_patients,map_variants)
    graph=function.cluster_noded_attributes(graph,map_patients,map_variants)

    #aggiunta delle informazioni cliniche alla mappa dei pazienti
    if len(data_clinical_sample)!=0:
        map_patients=function.enriched_sample_data(data_clinical_sample,map_cluster,map_patients,sample_name,patient_name)
    if len(data_clinical_patient)!=0:
        map_patients=function.enriched_patient_data(data_clinical_patient,map_patients,patient_name)

    #creazione file in cui riassumere le informazioni nei diversi cluster
    function.summary_info(path_save,map_cluster,map_patients,patient_name)
    function.numerosity_info(path_save,map_cluster)

    #creazione di una mappa con il numero di mutazioni per ogni gene + creazione di due mappe con i valori assoluti e percentuali di distribuzione delle mutazioni, per ciascun gene,
    #nei diversi cluster
    gene_total_count=function.count_gene(graph)
    map_cluster_gene_abs,map_cluster_gene_percent=function.count_gene_abs_percent(map_cluster,gene_total_count,path_save)
    #salvataggio delle percentuali di distribuzione delle mutazioni dei diversi geni nei cluster
    function.genes_single_cluster(map_cluster,path_save)

    function.couple_centroid_element(dendro,map_cluster,path_save)
    function.centroids_cluster(dendro,path_save)

    robjects.r('chooseCRANmirror(ind=46)')
    robjects.r.source("./enrichment.r")

    
    import enrichment_image as enrichment
    path_output=f'{data["Paths"]["output_folder"]}/Arricchimento_all_genes'

    for folder in os.listdir(path_output):
        full_path=os.path.join(path_output,folder)
        if os.path.isdir(full_path):
            for f in os.listdir(full_path):
            # print(full_path)
                if os.path.isfile(os.path.join(full_path,f)):
                    type_go=f.split("_")[0]
                    cluster=f.split("_")[1].split(".")[0]
                    if f.startswith("kegg"):
                        enrichment.plot_term_kegg(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
                    elif f.startswith("phen"):
                        enrichment.plot_term_pheno(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
                    
                    elif f.startswith("wiki"):
                        enrichment.plot_term_wiki(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
                    
                    else:
                        print(f"{full_path}/{f}")
                        enrichment.plot_term_go(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)

    '''
    #**************SEZIONE GRAFICA CONNESSIONI***************
    #defizione del numero di connessioni per ogni variante all'interno del cluster 
    variant_patient_connection_count=function.variant_conncection_patient(dendro)
    #defizione del numero di connessioni per ogni paziente all'interno del cluster
    patient_variant_connection_count=function.patient_connection_variant(dendro)
    graph=function.add_size_node(graph,variant_patient_connection_count)
    # CREAZIONE DI UN FILE "CONNECTION_VARIANT" IN CUI SONO INDICATE IL NUMERO DI VARIANTI COMUNI TRA I VARI PAZIENTI DI UN CLUSTER
    function.file_connection_variant(map_cluster,map_patients,path_save)
    # CREAZIONE DI UN FILE "CONCCECTION_PATIENT" IN CUI SONO INDICATI IL NUMERO DI PAZIENTI COMUNI TRA LE VARIE VARIANTI DI UN CLUSTER
    function.file_connection_patient(map_cluster,map_variants,path_save)
    #plott delle connessioni lato varianti:
    data_connection=pd.read_csv("./tcga_filtred/output/connection_patient.csv",sep="\t")
    #for i in range(len(map_cluster.keys())):
      # function.plot_distance_comutated_cluster_variants(dendro,i,data_connection,path_save)'''


if __name__=="__main__":
    main()

CARICAMENTO DATI

In [3]:
import json
#import rpy2.robjects as robjects
import  graph_construction as function
#import enrichment_image as enrichment

import os

#CARICAMENTO FILE CONFIGURAZIONE
with open("config.json") as f:
    data=json.load(f)

#Definizione pathway
path_data_mutational=data["Paths"]["data_mutational"]
path_clinical_sample=data["Paths"]["data_clinical_sample"]
path_clinical_patient=data["Paths"]["data_clinical_patient"]
column_mutation_name=data["Mutation"]["column_mutation_name"]
gene_interest=data["Mutation"]["gene_of_interest"]
path_save=data["Paths"]["output_folder"]
column_gene=data["Mutation"]["column_gene_name"]
column_hgvsp=data["Mutation"]["column_hgvsp_short"]
column_hgvsc=data["Mutation"]["column_hgvsc"]
column_variant_classification=data["Mutation"]["column_variant_classification"]
column_chromosome=data["Mutation"]["column_chromosome"]
column_start=data["Mutation"]["column_start"]
column_end=data["Mutation"]["column_end"]
sample_name=data["Clinical_data"]["column_sample_name"]
patient_name=data["Clinical_data"]["column_patient_name"]
vaf=data["Mutation"]["vaf"]
vaf_score=data["Mutation"]["vaf_score"]
column_vaf=data["Mutation"]["vaf_column"]
adjusted=data["Enrichment"]["adjusted"]
threshold=data["Enrichment"]["threshold"]

In [4]:
column_chromosome

'Chromosome'

In [2]:
#caricamento dei dataframes
data_mutational,data_clinical_sample,data_clinical_patient=function.read_file(path_data_mutational,path_clinical_sample,path_clinical_patient)

DIZIONARIO PAZIENTI-DIZIONARIO VARIANTI-COSTRUZIONE GRAFO

In [3]:
#creazione delle mappe pazienti e varianti + creazione del grafo
if column_mutation_name=="":
    #print("ciao")
    data_mutational=function.adding_category_mutation(data_mutational,column_gene,column_hgvsp,column_variant_classification,column_hgvsc,column_chromosome,column_start,column_end)

map_patients,map_variants,map_consequence=function.create_maps(data_mutational,data,column_mutation_name)

graph=function.graph_creation(map_patients,map_variants)


HGVSp_Short Variant_Classification None Chromosome Start_Position End_Position
Sostituzione_Amminoacidica


In [4]:
#filtraggio della VAF
if vaf and column_vaf!="":
    data_mutational=data_mutational[data_mutational[column_vaf]>=vaf_score]
elif vaf and column_vaf=="":
    data_mutational['t_AF']=data_mutational.apply(function.calculated_vaf, axis=1)
    print(len(data_mutational))
    data_mutational=data_mutational[data_mutational['t_AF']>=vaf_score]
    print(len(data_mutational))

2953
2929


In [5]:
#plot tipi di mutazione geni di interesse
if len(gene_interest)!=0:
    function.plot_mutation_gene(map_consequence,gene_interest)

CLUSTERIZZAZIONE

In [6]:
#clusterizzazione
seed=function.selected_seed(graph)

In [7]:
dendro=function.leiden_clustering(graph,seed)

numero di clusters: 114 Modularità: 0.9641596490665582


In [None]:
#gestione parte grafica del grafo (aggiunta colori + file per cytoscape)
graph=function.adding_graph_color(graph,dendro)
if not os.path.exists(path_save):
    os.makedirs(path_save)

DIZIONARIO CLUSTER

In [None]:
#creazione della mappa cluster e attribuzione del cluster i pazienti e alle varianti
map_cluster=function.map_cluster_creation(graph,dendro)

map_patients,map_variants=function.adding_cluster_to_map(map_cluster,map_patients,map_variants)

graph=function.cluster_noded_attributes(graph,map_patients,map_variants)

#function.write_graph_to_cytoscape(graph,path_save)

In [None]:
#plotnetwork singoli cluster
#for i in range(len(map_cluster.keys())):
   # function.plot_cluster_as_graph(graph,i,path_save)
    

INFORMAZIONI CLINICHE

In [None]:
#aggiunta delle informazioni cliniche alla mappa dei pazienti
if len(data_clinical_sample)!=0:
    map_patients=function.enriched_sample_data(data_clinical_sample,map_cluster,map_patients,sample_name,patient_name)
if len(data_clinical_patient)!=0:
    map_patients=function.enriched_patient_data(data_clinical_patient,map_patients,patient_name)

In [None]:
graph=function.adding_clinical_info_graph(graph,map_patients)

In [None]:
function.save_graph_to_file(graph, path_save)

FILE DI SINTESI

In [None]:
#creazione file in cui riassumere le informazioni nei diversi cluster
function.summary_info(path_save,map_cluster,map_patients,patient_name)
function.numerosity_info(path_save,map_cluster)

GENI

In [None]:
#creazione di una mappa con il numero di mutazioni per ogni gene + creazione di due mappe con i valori assoluti e percentuali di distribuzione delle mutazioni, per ciascun gene,
#nei diversi cluster
gene_total_count=function.count_gene(graph)
map_cluster_gene_abs,map_cluster_gene_percent=function.count_gene_abs_percent(map_cluster,gene_total_count,path_save)

#salvataggio delle percentuali di distribuzione delle mutazioni dei diversi geni nei cluster
function.genes_single_cluster(map_cluster,path_save)

function.genes_count_mutation_single_cluster(map_cluster_gene_abs,path_save)

In [None]:
function.creation_cluster_clinical_data(map_patients,path_save)

In [None]:
 #function.couple_centroid_element(dendro,map_cluster,path_save)
function.centroids_cluster(dendro,path_save)

In [None]:
function.degree_variant_cluster(map_cluster,graph,path_save)

In [None]:
#defizione del numero di connessioni per ogni variante all'interno del cluster 
variant_patient_connection_count=function.variant_conncection_patient(dendro)
#defizione del numero di connessioni per ogni paziente all'interno del cluster
patient_variant_connection_count=function.patient_connection_variant(dendro)

In [None]:
graph=function.add_size_node(graph,variant_patient_connection_count)

In [None]:
function.plot_graph(graph,path_save,"all")

In [None]:
# CREAZIONE DI UN FILE "CONNECTION_VARIANT" IN CUI SONO INDICATE IL NUMERO DI VARIANTI COMUNI TRA I VARI PAZIENTI DI UN CLUSTER
function.file_connection_variant(map_cluster,map_patients,path_save)

# CREAZIONE DI UN FILE "CONCCECTION_PATIENT" IN CUI SONO INDICATI IL NUMERO DI PAZIENTI COMUNI TRA LE VARIE VARIANTI DI UN CLUSTER
function.file_connection_patient(map_cluster,map_variants,path_save)

In [None]:
#import pandas as pd
#data_connection=pd.read_csv("./pdac/output/connection_patient.csv",sep="\t")

#for i in range(len(map_cluster.keys())):
    #function.plot_distance_comutated_cluster_variants(dendro,i,data_connection,path_save)

Arricchimento

In [None]:
import rpy2.robjects as robjects
robjects.r('chooseCRANmirror(ind=46)')
robjects.r.source("./enrichment.r")

In [None]:
import enrichment_image as enrichment
path_output=f'{data["Paths"]["output_folder"]}/Arricchimento_all_genes'

for folder in os.listdir(path_output):
    full_path=os.path.join(path_output,folder)
    if os.path.isdir(full_path):
        for f in os.listdir(full_path):
           # print(full_path)
            if os.path.isfile(os.path.join(full_path,f)):
                type_go=f.split("_")[0]
                cluster=f.split("_")[1].split(".")[0]
                if f.startswith("kegg"):
                    enrichment.plot_term_kegg(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
                elif f.startswith("phen"):
                    enrichment.plot_term_pheno(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
                
                elif f.startswith("wiki"):
                    enrichment.plot_term_wiki(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
                
                else:
                    print(f"{full_path}/{f}")
                    enrichment.plot_term_go(full_path,f"{full_path}/{f}",type_go,cluster,adjusted,threshold)
