In [None]:
import requests
import json
import os

In [2]:
def load_genes_from_file(filepath):
    """Load gene IDs from a file, one per line."""
    with open(filepath, "r") as file:
        genes = [line.split("\t")[0] for line in file if line.strip()]
    return genes


In [4]:
def routine(input, output, annot):
    string_api_url = "https://version-12-0.string-db.org/api"
    output_format = "json"
    method = "enrichment"

    request_url = "/".join([string_api_url, output_format, method])

    my_genes = load_genes_from_file(input)

    params = {

        "identifiers" : "%0d".join(my_genes), # your protein
        "species" : 10090, # NCBI/STRING taxon identifier 
        "caller_identity" : "gressm@natur.cuni.cz" # your app name
    }

    response = requests.post(request_url, data=params)
    data = json.loads(response.text)
    print("Success!!!", len(data))
    
    with open(output, "w", encoding="utf-8") as f:
        # Optionally write a header
        f.write("ID\tFDR\tNumber_in_List\tNumber_in_Reference\tDescription\tProteins\n")

        for row in data:

            term = row["term"]
            preferred_names = ",".join(row["preferredNames"])
            fdr = float(row["fdr"])
            description = f'"{row["description"]}"'
            number_in_list=row["number_of_genes"]
            number_in_reference=row["number_of_genes_in_background"]
            category = row["category"]

            if category == annot and fdr < 0.05:
                f.write("\t".join([
                    str(term),
                    f"{fdr:.2e}",
                    str(number_in_list),
                    str(number_in_reference),
                    description,
                    preferred_names

                ]) + "\n")
    return response

In [None]:
from typing import List, Optional

def map_to_string_db(
    input_file: str,
    output_file: Optional[str] = None,
    species: int = 10090,  # Mouse by default
    limit: int = 1,
    echo_query: int = 1,
    caller_identity: str = "Unknown"
) -> None:
    """
    Query STRING API and save results to file and/or print to console.
    
    Args:
        input_file: Path to input TSV file with gene names
        output_file: Path to save results (None for no file output)
        species: NCBI taxon ID (10090 for mouse)
        limit: Max results per identifier
        echo_query: Include input identifiers in output
        caller_identity: Application identifier
    """
    # STRING API configuration
    string_api_url = "https://version-12-0.string-db.org/api"
    output_format = "tsv-no-header"
    method = "get_string_ids"
    
    # Prepare parameters
    params = {
        "identifiers": "\r".join(load_genes_from_file(input_file)),
        "species": species,
        "limit": limit,
        "echo_query": echo_query,
        "caller_identity": caller_identity
    }
    
    # Construct and make request
    request_url = "/".join([string_api_url, output_format, method])
    results = requests.post(request_url, data=params)
    
    # Process results
    output_lines = []
    for i, line in enumerate(results.text.strip().split("\n")):
        if not line.strip():
            continue
        parts = line.split("\t")
        if len(parts) < 3:
            continue
            
        input_identifier, string_identifier = parts[0], parts[2]
        output_line = f"{i}\tInput:\t{input_identifier}\tSTRING:\t{string_identifier}"
        output_lines.append(output_line)
        #print(output_line)
    
    # Write to file if specified
    if output_file:
        with open(output_file, 'w') as f:
            f.write("\n".join(output_lines) + "\n")

## View 1 & 2 

In [None]:
work_dir =  r"C:\Users\misog\Documents\CUNI\sem06\bc_data\Ontology2"
views = ["view1-padj" ,"view2-lfc2-p05"]
suffixes = ["OE-female", "OE-male", "VNO-female", "VNO-male"]

annot_BP="Process"
annot_MF="Function"

for view in views:
    for suff in suffixes:
        file = view + '_' + suff 
        in_file=os.path.join(work_dir, view, file + '.tsv')

        out_BP_file = os.path.join(work_dir, view, "STRING", file  + '.BP.out.tsv')
        out_MF_file = os.path.join(work_dir, view, "STRING", file  + '.MF.out.tsv')

        print("Processing:", os.path.basename(in_file), " for BP ...")
        routine(in_file, out_BP_file, annot_BP)
        print("Processing:", os.path.basename(in_file), " for MF ...")
        routine(in_file, out_MF_file, annot_MF)
        
        out_map=os.path.join(work_dir, view, "STRING", file  + '.map.out.tsv')
        map_to_string_db(in_file, out_map)

Processing: view1-padj_OE-female.tsv  for BP ...
Success!!! 0
Processing: view1-padj_OE-female.tsv  for MF ...
Success!!! 0
Processing: view1-padj_OE-male.tsv  for BP ...
Success!!! 103
Processing: view1-padj_OE-male.tsv  for MF ...
Success!!! 103
Processing: view1-padj_VNO-female.tsv  for BP ...
Success!!! 219
Processing: view1-padj_VNO-female.tsv  for MF ...
Success!!! 219
Processing: view1-padj_VNO-male.tsv  for BP ...
Success!!! 102
Processing: view1-padj_VNO-male.tsv  for MF ...
Success!!! 102


## View 3

In [None]:
import os
work_dir =  r"C:\Users\misog\Documents\CUNI\sem06\bc_data\Ontology2"
view = "view3-mean-h100"
suffixes = ["OE", "VNO"]

annot_BP="Process"
annot_MF="Function"

for suff in suffixes:
    file = view + '_' + suff 
    in_file=os.path.join(work_dir, view, file + '.tsv')

    out_BP_file = os.path.join(work_dir, view, "STRING", file  + '.BP.out.tsv')
    out_MF_file = os.path.join(work_dir, view, "STRING", file  + '.MF.out.tsv')

    print("Processing:", os.path.basename(in_file), " for BP ...")
    routine(in_file, out_BP_file, annot_BP)
    print("Processing:", os.path.basename(in_file), " for MF ...")
    routine(in_file, out_MF_file, annot_MF)
    
    out_map=os.path.join(work_dir, view, "STRING", file  + '.map.out.tsv')
    map_to_string_db(in_file, out_map)


Processing: view3-mean-h100_OE.tsv  for BP ...
Success!!! 289
Processing: view3-mean-h100_OE.tsv  for MF ...
Success!!! 289
Processing: view3-mean-h100_VNO.tsv  for BP ...
Success!!! 327
Processing: view3-mean-h100_VNO.tsv  for MF ...
Success!!! 327
