# BPES

Literature search for genetic variants related to [blepharophimosis, ptosis, epicanthus inversus syndrome (BPES)](https://en.wikipedia.org/wiki/Blepharophimosis,_ptosis,_epicanthus_inversus_syndrome).

In [1]:
import math
import networkx as nx
import pandas as pd
import re
#from pyvis.network import Network

Load the Pubtator results and select the variants that were found, together with their associated genes.

In [2]:
pubtator_file = "pubtator_results_bpes_syndrome.txt"
unique_mutations = []
mutations = []
genes = {}
with open(pubtator_file, "r") as fh:
    for line in fh:
        line = line.split("\t")
        if len(line) > 1:
            pmid = line[0].strip()
            concept = line[4].strip()
            if concept == "Gene":
                gene_name = line[3].strip()
                gene_id = line[5].strip()
                genes[gene_id] = gene_name
            if concept == "DNAMutation" or concept == "ProteinMutation":
                mutation = line[3].strip()
                if mutation not in unique_mutations:
                    mutation_annotation = line[5]
                    corresponding_gene = re.search("CorrespondingGene:(\d+)", mutation_annotation)
                    corresponding_gene_id = corresponding_gene.group(1)
                    corresponding_gene_name = genes[corresponding_gene_id]
                    mutations.append([pmid, concept, mutation, corresponding_gene_name])
                    unique_mutations.append(mutation)
        else:
            continue
mutations_df = pd.DataFrame(mutations, columns=["PMID", "type", "mutation", "gene"])

In [3]:
mutations_df.head()

Unnamed: 0,PMID,type,mutation,gene
0,22248822,ProteinMutation,p.C134W,FOXL2
1,22312189,ProteinMutation,p.Ser217Cys,FOXL2
2,22336067,DNAMutation,c. 672_701dup30,FOXL2
3,22336067,ProteinMutation,p. Ala224_Ala234dup,FOXL2
4,22336067,DNAMutation,c.655C > T,FOXL2


In [4]:
mutations_df["gene"] = mutations_df["gene"].replace(["forkhead transcription factor FOXL2"], "FOXL2")
mutation_genes = list(mutations_df["gene"].unique())
mutation_genes

['FOXL2', 'BMP15', 'NR2F2', 'OSR2', 'StAR', 'ITGB5', 'POF', 'SEPT9']

Check if any of these genes mutated in BPES match the genes that came out of the long-read sequencing variant analysis.

Go through the Pubtator search results again and build a network of the genes, mutations, and diseases that were found.

In [9]:
pubtator_network = Network()
pubtator_network.repulsion()
unique_mutations = []
unique_publications = []
diseases = {}
genes = {}
with open(pubtator_file, "r") as fh:
    for line in fh:
        line = line.split("\t")
        if len(line) > 1:
            pmid = line[0].strip()
            if pmid not in unique_publications:
                unique_publications.append(pmid)
                pubtator_network.add_node(pmid, label=pmid, color="#f48484")
            concept = line[4].strip()
            if concept == "Gene":
                gene_id = line[5].strip()
                if gene_id not in genes:
                    gene_name = line[3].strip()
                    genes[gene_id] = gene_name
                    pubtator_network.add_node(gene_id, label=gene_name, color="#6096b4", 
                                              title=gene_name)
                    pubtator_network.add_edge(pmid, gene_id)
            elif concept == "DNAMutation" or concept == "ProteinMutation":
                mutation = line[3].strip()
                if mutation not in unique_mutations:
                    mutation_annotation = line[5]
                    corresponding_gene = re.search("CorrespondingGene:(\d+)", mutation_annotation)
                    corresponding_gene_id = corresponding_gene.group(1)
                    unique_mutations.append(mutation)
                    pubtator_network.add_node(mutation, label=mutation, color="#93bfcf", 
                                              title=mutation)
                    pubtator_network.add_edge(pmid, mutation)
                    pubtator_network.add_edge(mutation, corresponding_gene_id)
            elif concept == "Disease":
                disease_id = line[5].strip()
                if disease_id != "":
                    disease_name = line[3].strip()
                    diseases[disease_id] = disease_name
                    pubtator_network.add_node(disease_id, label=disease_name, 
                                              color="#e8d286", title=disease_name)
                    pubtator_network.add_edge(pmid, disease_id)
        else:
            continue

NameError: name 'Network' is not defined

In [10]:
pubtator_network.toggle_physics(True)
pubtator_network.show("pubtator_graph.html")

NameError: name 'pubtator_network' is not defined