In [1]:
import numpy as np
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/PPG/Projekt/BIOGRID-ALL-4.4.243.tab3.txt', sep='\t', dtype={1: str, 2: str, 18: str})

In [32]:
df.shape

(2732721, 31)

In [4]:
columns_to_remove = ['Ontology Term IDs', 'Ontology Term Names', 'Ontology Term Categories', 'Ontology Term Qualifier IDs', 'Ontology Term Qualifier Names', 'Ontology Term Types']
df = df.drop(columns=columns_to_remove)

df = df.dropna()

df.head()

Unnamed: 0,#BioGRID Interaction ID,Entrez Gene Interactor A,Entrez Gene Interactor B,BioGRID ID Interactor A,BioGRID ID Interactor B,Systematic Name Interactor A,Systematic Name Interactor B,Official Symbol Interactor A,Official Symbol Interactor B,Synonyms Interactor A,...,Tags,Source Database,SWISS-PROT Accessions Interactor A,TREMBL Accessions Interactor A,REFSEQ Accessions Interactor A,SWISS-PROT Accessions Interactor B,TREMBL Accessions Interactor B,REFSEQ Accessions Interactor B,Organism Name Interactor A,Organism Name Interactor B
0,103,6416,2318,112315,108607,-,-,MAP2K4,FLNC,JNKK|JNKK1|MAPKK4|MEK4|MKK4|PRKMK4|SAPKK-1|SAP...,...,-,BIOGRID,P45985,-,NP_003001|NP_001268364,Q14315,Q59H94,NP_001120959|NP_001449,Homo sapiens,Homo sapiens
1,117,84665,88,124185,106603,-,-,MYPN,ACTN2,CMD1DD|CMH22|MYOP|RCM4,...,-,BIOGRID,Q86TC9,A0A087WX60,NP_001243197|NP_001243196|NP_115967,P35609,Q59FD9|F6THM6,NP_001094|NP_001265272|NP_001265273,Homo sapiens,Homo sapiens
2,183,90,2339,106605,108625,-,-,ACVR1,FNTA,ACTRI|ACVR1A|ACVRLK2|ALK2|FOP|SKR1|TSRI,...,-,BIOGRID,Q04771,D3DPA4,NP_001104537|NP_001096,P49354,-,NP_002018,Homo sapiens,Homo sapiens
3,278,2624,5371,108894,111384,-,-,GATA2,PML,DCML|IMD21|MONOMAC|NFE1B,...,-,BIOGRID,P23769,-,NP_001139134|NP_116027|NP_001139133,P29590,-,NP_150250|NP_150253|NP_150252|NP_150247|NP_150...,Homo sapiens,Homo sapiens
4,418,6118,6774,112038,112651,RP4-547C9.3,-,RPA2,STAT3,REPA2|RP-A p32|RP-A p34|RPA32,...,-,BIOGRID,P15927,B4DUL2,NP_001342057|NP_002937|NP_001284487|NP_0013420...,P40763,-,NP_644805|NP_003141|NP_001356447|NP_001356443|...,Homo sapiens,Homo sapiens


In [33]:
filtered_df = df[(df['Organism Name Interactor A'].str.contains('Human Immunodeficiency Virus 1')) | (df['Organism Name Interactor B'].str.contains('Human Immunodeficiency Virus 1'))]

In [34]:
filtered_df.shape

(2555, 31)

In [35]:
import networkx as nx

In [36]:
import networkx as nx

hiv_human_interactions = filtered_df[
    (
        (filtered_df['Organism Name Interactor A'] == 'Human Immunodeficiency Virus 1') &
        (filtered_df['Organism Name Interactor B'] == 'Homo sapiens')
    ) | (
        (filtered_df['Organism Name Interactor A'] == 'Homo sapiens') &
        (filtered_df['Organism Name Interactor B'] == 'Human Immunodeficiency Virus 1')
    )
]

if hiv_human_interactions.empty:
    print("Error: The dataframe for HIV-Human interactions is empty. Cannot create graph.")
else:
    graph = nx.from_pandas_edgelist(
        hiv_human_interactions,
        'Official Symbol Interactor A',
        'Official Symbol Interactor B',
        create_using=nx.Graph()
    )

In [37]:
graph.remove_nodes_from(list(nx.isolates(graph)))

In [38]:
graph.number_of_nodes()

1430

In [39]:
import requests
import json

DGRAPH_URL = "https://nameless-brook-700075.eu-central-1.aws.cloud.dgraph.io"
API_KEY = DGRAPH

HEADERS = {
    "Content-Type": "application/json",
    "X-Auth-Token": API_KEY
}

schema = """
name: string @index(exact, trigram) .
interacts_with: [uid] @reverse .
type Protein {
  name
  interacts_with
}
"""

res = requests.post(f"{DGRAPH_URL}/alter", json={"schema": schema}, headers=HEADERS)
print("Schema status:", res.status_code, res.text)

Schema status: 200 {"data":{"code":"Success","message":"Done"}}


In [41]:
uid_map = {node: f"_:{node}" for node in graph.nodes()}

nodes = [{"uid": uid_map[n], "name": n, "dgraph.type": "Protein"} for n in graph.nodes()]

edges = []
for src, dst in graph.edges():
    edges.append({
        "uid": uid_map[src],
        "interacts_with": [{"uid": uid_map[dst]}]
    })

mutation_data = {"set": nodes + edges}
print("Number of nodes:", len(nodes), "Number of relations:", len(edges))

res = requests.post(f"{DGRAPH_URL}/mutate?commitNow=true", json=mutation_data, headers=HEADERS)
print("Loading data:", res.status_code, res.text)

Number of nodes: 1430 Number of relations: 1708
Loading data: 200 {"data":{"code":"Success","message":"Done","queries":null,"uids":{"AATF":"0x1a53c19b12","ABCB10":"0x1a53c19f76","ABCD3":"0x1a53c19ea9","ABCE1":"0x1a53c19af9","ABCF1":"0x1a53c19d6c","ACACA":"0x1a53c19a8f","ACACB":"0x1a53c19a89","ACBD3":"0x1a53c19aa2","ACBD5":"0x1a53c19f47","ACIN1":"0x1a53c19cfc","ACOT8":"0x1a53c19e0b","ACSL3":"0x1a53c19c67","ACTA1":"0x1a53c19e06","ACTB":"0x1a53c19b3b","ACTG1":"0x1a53c19bf6","ACTL6A":"0x1a53c19e75","ADAR":"0x1a53c19bbe","ADD1":"0x1a53c19ce0","AFF1":"0x1a53c19e02","AFF4":"0x1a53c19ad3","AGFG1":"0x1a53c19fc6","AGO2":"0x1a53c19b8a","AGPS":"0x1a53c19fbd","AHCTF1":"0x1a53c19bfb","AHCY":"0x1a53c19d85","AHCYL1":"0x1a53c19bf3","AHCYL2":"0x1a53c19d36","AHNAK":"0x1a53c19d03","AICDA":"0x1a53c19c42","AIMP1":"0x1a53c19b2d","AIMP2":"0x1a53c19e4e","AKAP11":"0x1a53c19b36","AKAP8L":"0x1a53c19d5e","AKAP9":"0x1a53c19aac","AKT2":"0x1a53c19efb","ALDOA":"0x1a53c19d30","ALMS1":"0x1a53c19ba7","AMBRA1":"0x1a53c19d

In [42]:
query = """
{
  tat(func: eq(name, "tat")) {
    name
    interacts_with {
      name
    }
  }
}
"""

res = requests.post(f"{DGRAPH_URL}/query", data=json.dumps({"query": query}), headers=HEADERS)
print("Result", json.dumps(res.json(), indent=2))

Result {
  "data": {
    "tat": [
      {
        "name": "tat",
        "interacts_with": [
          {
            "name": "PURA"
          },
          {
            "name": "TAF15"
          },
          {
            "name": "NCAPD2"
          },
          {
            "name": "PSMA3"
          },
          {
            "name": "WDR5"
          },
          {
            "name": "TRRAP"
          },
          {
            "name": "SQSTM1"
          },
          {
            "name": "MEAF6"
          },
          {
            "name": "ERCC3"
          },
          {
            "name": "MBD3"
          },
          {
            "name": "BAZ1B"
          },
          {
            "name": "CDK9"
          },
          {
            "name": "SRRM2"
          },
          {
            "name": "TRIM32"
          },
          {
            "name": "SMARCA2"
          },
          {
            "name": "LUC7L2"
          },
          {
            "name": "MYH9"
          },
     