In [266]:
import matplotlib.pyplot as plt 
import numpy as np
from Bio import Entrez
from bioservices import UniProt 
import networkx as nx 


In [267]:
def parse_interactome(file_path):
    interactome_data = {}
    with open(file_path, 'r') as file:
        next(file)  # Skip the header line
        for line in file:
            tail, head, edge_weight, _ = line.strip().split('\t')
            edge_weight = float(edge_weight)
            if tail not in interactome_data:
                interactome_data[tail] = []
            interactome_data[tail].append((head, edge_weight))
    return interactome_data

In [268]:
def list_connected_proteins(file_path, protein):
    interactome_data = parse_interactome(file_path)
    connected_proteins = interactome_data.get(protein, [])
    with open(f"{protein}_connected_proteins.txt", 'w') as output_file:
        # degree of protein: number of connections
        output_file.write(f"Degree of {protein}: {len(connected_proteins)}\n")
        for connected_protein, weight in connected_proteins:
            output_file.write(f"{connected_protein}\t{weight}\n")

In [269]:
def get_gene_name_from_uniprot(uniprot_ids):
    Entrez.email = "asmaa.abdelhamid02@eng-st.cu.edu.eg"  
    gene_names = {}
    for uniprot_id in uniprot_ids:
        handle = Entrez.efetch(db="protein", id=uniprot_id, rettype="fasta", retmode="text")
        fasta_record = handle.read()
        handle.close()
        # Split the header line by "|" and take the second component
        gene_name = fasta_record.split("|")[2]
        # Extract only the gene name portion by splitting with a space and taking the first part
        gene_name = gene_name.split(" ")[0]
        gene_names[uniprot_id] = gene_name
    return gene_names




In [270]:

# Main code

file_path = "PathLinker_2018_human-ppi-weighted-cap0_75.txt"
UniProt_ID = "P05067"
# UniProt_ID = ["P05067", "Q8TBF4"]
interactome_data = parse_interactome(file_path) 
list_connected_proteins (file_path, UniProt_ID)
gene_name = get_gene_name_from_uniprot(UniProt_ID)
for uniprot_id, gene_name in gene_name.items():
  print(f"Gene name for {uniprot_id}: {gene_name}")


Gene name for P05067: A4_HUMAN
Gene name for Q8TBF4: ZCRB1_HUMAN
