In [10]:
import os
import json
import requests


url = "http://l2s2.maayanlab.cloud/graphql"


def get_l2s2_valid_genes(genes: list[str]):
    query = {
    "query": """query GenesQuery($genes: [String]!) {
        geneMap2(genes: $genes) {
            nodes {
                gene
                geneInfo {
                    symbol
                    }
                }
            }
        }""",
    "variables": {"genes": genes},
    "operationName": "GenesQuery"
    }
    
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }

    response = requests.post(url, data=json.dumps(query), headers=headers)

    response.raise_for_status()
    res = response.json()
    return [g['geneInfo']['symbol'] for g in res['data']['geneMap2']['nodes'] if g['geneInfo'] != None]


all_genes = []
for fn in os.listdir("../data/processed/01_updown/"):
    if fn.endswith(".txt"):
        with open(os.path.join("../data/processed/01_updown/", fn)) as f:
            genes = [line.strip() for line in f.readlines()]
            all_genes += genes


all_genes = list(set(all_genes))
valid_genes = get_l2s2_valid_genes(all_genes)



In [16]:
import csv

with open("../data/processed/02_l2s2_queries/all.txt", "w") as f:
    writer = csv.writer(f)
    for gene in valid_genes:
        writer.writerow([gene])


for fn in os.listdir("../data/processed/01_updown/"):
    if fn.endswith("_up.txt"):
        with open(os.path.join("../data/processed/01_updown/", fn)) as f:
            genes = [line.strip() for line in f.readlines()]
            valid_subset = [gene for gene in genes if gene in valid_genes]
        with open(os.path.join("../data/processed/02_l2s2_queries/", fn), "w") as out_f:
            writer = csv.writer(out_f)
            for gene in valid_subset:
                writer.writerow([gene])


for fn in os.listdir("../data/processed/01_updown/"):
    if fn.endswith("_dw.txt"):
        with open(os.path.join("../data/processed/01_updown/", fn)) as f:
            genes = [line.strip() for line in f.readlines()]
            valid_subset = [gene for gene in genes if gene in valid_genes]
        with open(os.path.join("../data/processed/02_l2s2_queries/", fn), "w") as out_f:
            writer = csv.writer(out_f)
            for gene in valid_subset:
                writer.writerow([gene])

In [None]:
all_up = []
all_dw = []
for fn in os.listdir("../data/processed/02_l2s2_queries/"):
    if fn.endswith("_up.txt"):
        with open(os.path.join("../data/processed/02_l2s2_queries/", fn)) as f:
            genes = [line.strip() for line in f.readlines()]
            all_up += genes
    elif fn.endswith("_dw.txt"):
        with open(os.path.join("../data/processed/02_l2s2_queries/", fn)) as f:
            genes = [line.strip() for line in f.readlines()]
            all_dw += genes

all_up = list(set(all_up))
all_dw = list(set(all_dw))

with open("../data/processed/02_l2s2_queries/all_up.txt", "w") as f:
    writer = csv.writer(f)
    for gene in all_up:
        writer.writerow([gene])

with open("../data/processed/02_l2s2_queries/all_dw.txt", "w") as f:
    writer = csv.writer(f)
    for gene in all_dw:
        writer.writerow([gene])