In [None]:
import os
import json
import gzip
import shutil
from pathlib import Path
import subprocess
from tqdm import tqdm
from collections import defaultdict

# Move up to top directory

print(os.getcwd())

while not Path("LICENSE").exists():
    os.chdir("../")

print(os.getcwd())

In [None]:
# ===================================
# Configuration for unified deploy
# Set this flag to True when you want a quick test run
# Set to False for full main deployment
# ===================================


is_test = True
# test: 1 min
# main: 30 min

output_dir = Path("test-tsumugi") if is_test else Path("TSUMUGI")
Path(output_dir).mkdir(exist_ok=True, parents=True)

# Helper lists conditioned on the mode
if is_test:
    selected_phenotypes = [
        "edema",
        "male infertility",
        "increased fasting circulating glucose level",
        "preweaning lethality, complete penetrance",
        "increased blood urea nitrogen level",
        "increased circulating glycerol level",
        "convulsive seizures",
    ]
    gene_symbols = [
        "Rab10",
        "Ints8",
        "Trappc11",
        "Zfp39",
        "Kcnma1",
        "Plekha8",
        "Dstn",
    ]
else:
    # Load all phenotypes and gene symbols available in the data directory
    mp_terms_file = Path("data", "overlap", "available_mp_terms.txt")
    gene_symbols_file = Path("data", "overlap", "available_gene_symbols.txt")
    selected_phenotypes = (
        mp_terms_file.read_text().splitlines() if mp_terms_file.exists() else []
    )
    gene_symbols = (
        gene_symbols_file.read_text().splitlines() if gene_symbols_file.exists() else []
    )

if not selected_phenotypes or not gene_symbols:
    raise ValueError(
        "No phenotypes or gene symbols selected. Please check the data directory."
    )

print("Mode:", "TEST" if is_test else "MAIN")
print("Output dir:", output_dir)
print("Phenotypes:", len(selected_phenotypes))
print("Genes:", len(gene_symbols))

# 新しいファイルに置換


In [None]:
# 削除対象ディレクトリ
for subdir in ["data", "app"]:
    target_dir = output_dir / subdir
    if target_dir.exists() and target_dir.is_dir():
        shutil.rmtree(target_dir)

In [None]:
# 必要なディレクトリの作成
Path(output_dir / "data" / "phenotype").mkdir(parents=True, exist_ok=True)
Path(output_dir / "data" / "genesymbol").mkdir(parents=True, exist_ok=True)
Path(output_dir / "app" / "phenotype").mkdir(parents=True, exist_ok=True)
Path(output_dir / "app" / "genesymbol").mkdir(parents=True, exist_ok=True)
Path(output_dir / "app" / "genelist").mkdir(parents=True, exist_ok=True)

if is_test:
    # top ページ用の CSS / JS コピー
    for asset in ["css", "js"]:
        src = Path("TSUMUGI") / asset
        dst = output_dir / asset
        if src.exists():
            shutil.copytree(src, dst, dirs_exist_ok=True)

# app ページ用の CSS / JS コピー
template_dir = Path("TSUMUGI") / "template"
for asset in ["css", "js"]:
    src = template_dir / asset
    dst = output_dir / "app" / asset
    if src.exists():
        shutil.copytree(src, dst)

## JSONのコピー

In [None]:
src_phenotype_dir = Path("data/network/mp_term_name")
src_gene_dir = Path("data/network/gene_symbol")

dst_phenotype_dir = output_dir / "data" / "phenotype"
dst_gene_dir = output_dir / "data" / "genesymbol"

# コピー先ディレクトリを作成（存在してもOK）
dst_phenotype_dir.mkdir(parents=True, exist_ok=True)
dst_gene_dir.mkdir(parents=True, exist_ok=True)

if is_test:
    # phenotype データのみコピー（拡張子を指定）
    for pheno in selected_phenotypes:
        pheno = pheno.replace(" ", "_")  # 空白をアンダースコアに変換
        src_file = src_phenotype_dir / f"{pheno}.json.gz"
        if src_file.exists():
            shutil.copy(src_file, dst_phenotype_dir / src_file.name)

    # gene symbol データのみコピー
    for gene in gene_symbols:
        src_file = src_gene_dir / f"{gene}.json.gz"
        if src_file.exists():
            shutil.copy(src_file, dst_gene_dir / src_file.name)

else:
    # すべての phenotype ファイルをコピー
    for file in tqdm(
        src_phenotype_dir.glob("*.json.gz"), desc="Copying phenotype files"
    ):
        shutil.copy(file, dst_phenotype_dir / file.name)

    # すべての gene symbol ファイルをコピー
    for file in tqdm(src_gene_dir.glob("*.json.gz"), desc="Copying gene symbol files"):
        shutil.copy(file, dst_gene_dir / file.name)

# dst_phenotype_dirのファイル数をカウント
phenotype_count = len(list(dst_phenotype_dir.glob("*.json.gz")))
# dst_gene_dirのファイル数をカウント
gene_count = len(list(dst_gene_dir.glob("*.json.gz")))
print(f"Copied {phenotype_count} phenotype files to {dst_phenotype_dir}")
print(f"Copied {gene_count} gene symbol files to {dst_gene_dir}")
if phenotype_count == 0 or gene_count == 0:
    raise ValueError(
        "No phenotype or gene symbol files copied. Please check the source directories."
    )

# 6 min

## index.html の描画に必要なファイルをコピー


In [None]:
data_dir = output_dir / "data"

# コピー元とコピー先の対応表
file_map = {
    "data/overlap/available_mp_terms.json": data_dir / "available_mp_terms.json",
    "data/overlap/available_mp_terms.txt": data_dir / "available_mp_terms.txt",
    "data/overlap/available_gene_symbols.txt": data_dir / "available_gene_symbols.txt",
    "data/annotation/symbol_mgiid.json": data_dir / "marker_symbol_accession_id.json",
}

# ファイルを順にコピー
for src, dst in file_map.items():
    shutil.copy(src, dst)

## index.html を生成


In [None]:
# ファイルパスの設定
template_path = Path("TSUMUGI") / "template" / "template_index.html"
output_path = output_dir / "index.html"

# "REMOVE_THIS_LINE" を含む行を除外して書き込み
with (
    template_path.open("r", encoding="utf-8") as infile,
    output_path.open("w", encoding="utf-8") as outfile,
):
    for line in infile:
        if "REMOVE_THIS_LINE" not in line:
            outfile.write(line)

# appのHTMLとJSを生成


### binariyの表現型を抽出

In [None]:
path_files = Path("TSUMUGI", "data", "phenotype").glob("*.json.gz")
binary_phenotypes = []
for path_file in path_files:
    mp_term_name = path_file.name.replace(".json.gz", " ")
    with gzip.open(path_file, "rt") as f:
        json_data: list[dict] = json.load(f)

    count_node_color = defaultdict(int)
    for data in json_data:
        if "node_color" not in data["data"]:
            continue
        count_node_color[data["data"]["node_color"]] += 1
    if len(count_node_color) == 1 and 1 in count_node_color:
        binary_phenotypes.append(mp_term_name)

print(len(binary_phenotypes))
if len(binary_phenotypes) == 0:
    raise ValueError("No binary phenotypes found. Please check the data directory.")

print(binary_phenotypes[:3])
_ = Path("data", "annotation", "binary_phenotypes.txt").write_text(
    "\n".join(binary_phenotypes)
)
_ = Path("TSUMUGI", "data", "binary_phenotypes.txt").write_text(
    "\n".join(binary_phenotypes)
)

## Phenotype

In [None]:
# ========== ファイル処理ユーティリティ ==========


def read_file(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        return f.read()


def write_file(filepath, content):
    with open(filepath, "w", encoding="utf-8") as f:
        f.write(content)


# ========== データ収集系関数 ==========


def get_target_phenotypes(dir_path, target_phenotypes: list[str] | None = None):
    targets = []
    for file in Path(dir_path).glob("*.csv"):
        name = file.stem
        if target_phenotypes is None:
            targets.append(name)
        else:
            if name.startswith(target_phenotypes):
                targets.append(name)
    return targets


def get_impc_url(mp_term_name_space, tsv_path):
    with open(tsv_path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split("\t")
            if parts[0] == mp_term_name_space:
                return parts[1]
    return ""


def is_binary(mp_term, binary_file_path):
    with open(binary_file_path) as f:
        return any(line.strip() == mp_term for line in f)


# ========== テンプレート系関数 ==========


def replace_placeholder(template, placeholder, insert_text):
    return template.replace(placeholder, insert_text)


def inject_html(template_path, insert_path, placeholder, output_path):
    template = read_file(template_path)
    insert = read_file(insert_path)
    updated = replace_placeholder(template, placeholder, insert)
    write_file(output_path, updated)


def generate_simple_html(template_path, output_path, replacements):
    content = read_file(template_path)
    for key, value in replacements.items():
        content = content.replace(key, value)
    write_file(output_path, content)


# ========== HTML生成系関数 ==========


def generate_all_html(mp_term, mp_term_name_space, impc_url, mode, output_dir):
    # body-container と cy-container
    for part in ["body-container", "cy-container"]:
        template_path = f"TSUMUGI/template/template-app-html/{part}.html"
        output_path = f"/tmp/{part}.html"
        if mode == "non-binary-phenotype":
            insert_path = f"TSUMUGI/template/template-app-html/{part}-phenotype.html"
            inject_html(
                template_path, insert_path, "XXX_PHENOTYPE_SEVERITY", output_path
            )
        else:
            generate_simple_html(
                template_path, output_path, {"XXX_PHENOTYPE_SEVERITY": ""}
            )

    # head.html
    generate_simple_html(
        "TSUMUGI/template/template-app-html/head.html",
        "/tmp/head.html",
        {"XXX_TITLE": mp_term_name_space, "XXX_JS_FILE_NAME": mp_term},
    )

    # header.html
    header_insert = f"<a href='{impc_url}' target='_blank'>{mp_term_name_space}</a>"
    generate_simple_html(
        "TSUMUGI/template/template-app-html/header.html",
        "/tmp/header.html",
        {"XXX_TITLE": header_insert},
    )

    # template_app.html → 完成版HTML
    template = read_file("TSUMUGI/template/template-app-html/template_app.html")
    final_html = (
        template.replace("XXX_HEAD", read_file("/tmp/head.html"))
        .replace("XXX_H1", read_file("/tmp/header.html"))
        .replace("XXX_BODY_CONTAINER", read_file("/tmp/body-container.html"))
        .replace("XXX_CY_CONTAINER", read_file("/tmp/cy-container.html"))
    )

    write_file(f"{output_dir}/app/phenotype/{mp_term}.html", final_html)


# ========== JavaScript生成関数 ==========


def generate_javascript(mp_term, mp_term_name_space, mode, output_dir):
    template_app_path = "/tmp/template_app.js"

    if mode == "non-binary-phenotype":
        shutil.copy(
            "TSUMUGI/template/template-app-js/filterByNodeColorAndEdgeSize_phenotype.js",
            "/tmp/filterByNodeColorAndEdgeSize_phenotype.js",
        )

        template = read_file("TSUMUGI/template/template-app-js/template_app.js")
        node_min_max = read_file("TSUMUGI/template/template-app-js/nodeMinMax.js")
        init = read_file(
            "TSUMUGI/template/template-app-js/node_color_initialization.js"
        )
        update = read_file("TSUMUGI/template/template-app-js/node_color_update.js")
        template = (
            template.replace("XXX_NODE_MIN_MAX", node_min_max)
            .replace("XXX_NODE_COLOR_INITIALIZATION", init)
            .replace("XXX_NODE_COLOR_UPDATE", update)
        )
        write_file(template_app_path, template)

    else:
        # Binary phenotype の処理
        lines = read_file(
            "TSUMUGI/template/template-app-js/filterByNodeColorAndEdgeSize_phenotype.js"
        ).splitlines()
        filtered_lines = "\n".join(
            line for line in lines if "REMOVE_THIS_LINE_IF_BINARY_PHENOTYPE" not in line
        )
        write_file("/tmp/filterByNodeColorAndEdgeSize_phenotype.js", filtered_lines)

        template = read_file("TSUMUGI/template/template-app-js/template_app.js")
        template = template.replace("XXX_NODE_COLOR_INITIALIZATION", "").replace(
            "XXX_NODE_COLOR_UPDATE", ""
        )
        write_file(template_app_path, template)

    # 最終JS生成
    main_template = read_file(template_app_path)
    insert = read_file("/tmp/filterByNodeColorAndEdgeSize_phenotype.js")

    final_js = (
        main_template.replace("XXX_NODE_MIN_MAX", "")
        .replace("XXX_FILTER_BY_NODE_COLOR_AND_EDGE_SIZE", insert)
        .replace(
            "XXX_EDGE_MIN_MAX",
            "const edgeMin = Math.min(...edgeSizes); const edgeMax = Math.max(...edgeSizes);",
        )
        .replace(
            "XXX_ELEMENTS", f"loadJSONGz('../../data/phenotype/{mp_term}.json.gz')"
        )
        .replace("XXX_PHENOTYPE", mp_term_name_space)
        .replace("XXX_NAME", mp_term)
    )

    write_file(f"{output_dir}/app/phenotype/{mp_term}.js", final_js)

In [None]:
# ========== メイン処理 ==========

for mp_term_name_space in tqdm(selected_phenotypes, desc="Processing phenotypes"):
    mp_term = mp_term_name_space.replace(" ", "_")
    if is_test:
        print(f"Processing: {mp_term_name_space}")
    impc_url = get_impc_url(
        mp_term_name_space, "data/annotation/mptermname_phenotypeurl.tsv"
    )
    mode = (
        "binary_phenotype"
        if is_binary(mp_term, "data/annotation/binary_phenotypes.txt")
        else "non-binary-phenotype"
    )

    # データコピー
    shutil.copy(
        f"data/network/mp_term_name/{mp_term}.json.gz",
        f"{output_dir}/data/phenotype/{mp_term}.json.gz",
    )

    generate_all_html(mp_term, mp_term_name_space, impc_url, mode, output_dir)
    generate_javascript(mp_term, mp_term_name_space, mode, output_dir)

# 1 min

## Gene Symbol

In [None]:
# ========== Gene Symbol用 HTML生成関数 ==========
def generate_gene_html(gene_symbol, impc_url, output_dir):
    # body-container と cy-container
    for part in ["body-container", "cy-container"]:
        template_path = f"TSUMUGI/template/template-app-html/{part}.html"
        output_path = f"/tmp/{part}.html"
        generate_simple_html(template_path, output_path, {"XXX_PHENOTYPE_SEVERITY": ""})

    # head.html
    generate_simple_html(
        "TSUMUGI/template/template-app-html/head.html",
        "/tmp/head.html",
        {"XXX_TITLE": gene_symbol, "XXX_JS_FILE_NAME": gene_symbol},
    )

    # header.html
    header_insert = f"<a href='{impc_url}' target='_blank'>{gene_symbol}</a>"
    generate_simple_html(
        "TSUMUGI/template/template-app-html/header.html",
        "/tmp/header.html",
        {"XXX_TITLE": header_insert},
    )

    # template_app.html
    template = read_file("TSUMUGI/template/template-app-html/template_app.html")
    final_html = (
        template.replace("XXX_HEAD", read_file("/tmp/head.html"))
        .replace("XXX_H1", read_file("/tmp/header.html"))
        .replace("XXX_BODY_CONTAINER", read_file("/tmp/body-container.html"))
        .replace("XXX_CY_CONTAINER", read_file("/tmp/cy-container.html"))
    )

    write_file(f"{output_dir}/app/genesymbol/{gene_symbol}.html", final_html)


# ========== JavaScript生成関数 ==========
def generate_gene_javascript(gene_symbol, output_dir):
    # template_app.js のベース作成（特定部分除去）
    template_lines = read_file(
        "TSUMUGI/template/template-app-js/template_app.js"
    ).splitlines()
    filtered_lines = [
        line
        for line in template_lines
        if "XXX_NODE_COLOR_INITIALIZATION" not in line
        and "XXX_NODE_COLOR_UPDATE" not in line
    ]
    write_file("/tmp/template_app.js", "\n".join(filtered_lines))

    # JS中に埋め込むコードを差し込み
    template = read_file("/tmp/template_app.js")
    insert_filterByNodeColorAndEdgeSize = read_file(
        "TSUMUGI/template/template-app-js/filterByNodeColorAndEdgeSize_genesymbol.js"
    )
    insert_edgeMinMax = read_file(
        "TSUMUGI/template/template-app-js/edgeMinMax_for_genesymbol.js"
    )

    final_js = (
        template.replace(
            "XXX_FILTER_BY_NODE_COLOR_AND_EDGE_SIZE",
            insert_filterByNodeColorAndEdgeSize,
        )
        .replace("XXX_NODE_MIN_MAX", "")
        .replace("XXX_EDGE_MIN_MAX", insert_edgeMinMax)
        .replace("XXX_ELEMENTS", "loadJSONGz('../../data/genesymbol/XXX_NAME.json.gz')")
        .replace("XXX_PHENOTYPE", "")
        .replace("XXX_NAME", gene_symbol)
    )

    write_file(f"{output_dir}/app/genesymbol/{gene_symbol}.js", final_js)


# ========== MGI ID 取得関数 ==========
def get_mgi_id(gene_symbol, tsv_path="data/annotation/symbol_mgiid.tsv"):
    with open(tsv_path, encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split("\t")
            if parts[0] == gene_symbol:
                return parts[1]
    return ""

In [None]:
# ========== メイン処理 ==========

for gene_symbol in tqdm(gene_symbols, desc="Processing gene symbols"):
    if is_test:
        print(f"Processing: {gene_symbol}")

    # データコピー
    src = f"data/network/gene_symbol/{gene_symbol}.json.gz"
    if not Path(src).exists():
        continue

    dst = f"{output_dir}/data/genesymbol/{gene_symbol}.json.gz"
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.copy(src, dst)

    # URL生成
    mgi_id = get_mgi_id(gene_symbol)
    impc_url = f"https://www.mousephenotype.org/data/genes/{mgi_id}"

    # HTMLとJS生成
    generate_gene_html(gene_symbol, impc_url, output_dir)
    generate_gene_javascript(gene_symbol, output_dir)

# 15 min

## Gene List

In [None]:
# ========== Gene List用 HTML生成関数 ==========
def generate_genelist_html(output_dir):
    # body-container と cy-container
    for part in ["body-container", "cy-container"]:
        template_path = f"TSUMUGI/template/template-app-html/{part}.html"
        output_path = f"/tmp/{part}.html"
        generate_simple_html(template_path, output_path, {"XXX_PHENOTYPE_SEVERITY": ""})

    # head.html
    generate_simple_html(
        "TSUMUGI/template/template-app-html/head.html",
        "/tmp/head.html",
        {"XXX_TITLE": "Gene List", "XXX_JS_FILE_NAME": "network_genelist"},
    )

    # header.html
    header_insert = "gene list"
    generate_simple_html(
        "TSUMUGI/template/template-app-html/header.html",
        "/tmp/header.html",
        {"XXX_TITLE": header_insert},
    )

    # template_app.html
    template = read_file("TSUMUGI/template/template-app-html/template_app.html")
    final_html = (
        template.replace("XXX_HEAD", read_file("/tmp/head.html"))
        .replace("XXX_H1", read_file("/tmp/header.html"))
        .replace("XXX_BODY_CONTAINER", read_file("/tmp/body-container.html"))
        .replace("XXX_CY_CONTAINER", read_file("/tmp/cy-container.html"))
    )

    write_file(f"{output_dir}/app/genelist/network_genelist.html", final_html)


# ========== JavaScript生成関数 ==========
def generate_genelist_javascript(output_dir):
    # template_app.js のベース作成（特定部分除去）
    template_lines = read_file(
        "TSUMUGI/template/template-app-js/template_app.js"
    ).splitlines()
    filtered_lines = [
        line
        for line in template_lines
        if "XXX_NODE_COLOR_INITIALIZATION" not in line
        and "XXX_NODE_COLOR_UPDATE" not in line
    ]
    write_file("/tmp/template_app.js", "\n".join(filtered_lines))

    # JS中に埋め込むコードを差し込み
    template = read_file("/tmp/template_app.js")
    insert = read_file(
        "TSUMUGI/template/template-app-js/filterByNodeColorAndEdgeSize_genelist.js"
    )

    final_js = (
        template.replace("XXX_FILTER_BY_NODE_COLOR_AND_EDGE_SIZE", insert)
        .replace("XXX_NODE_MIN_MAX", "")
        .replace(
            "XXX_EDGE_MIN_MAX",
            "const edgeMin = Math.min(...edgeSizes); const edgeMax = Math.max(...edgeSizes);",
        )
        .replace("XXX_ELEMENTS", "JSON.parse(localStorage.getItem('elements'))")
        .replace("XXX_PHENOTYPE", "")
        .replace("XXX_NAME", "geneList")
    )

    write_file(f"{output_dir}/app/genelist/network_genelist.js", final_js)

In [None]:
# ========== メイン処理 ==========
# HTMLとJS生成
generate_genelist_html(output_dir)
generate_genelist_javascript(output_dir)

In [None]:
# 整形対象のファイル／ディレクトリ
targets = [output_dir / "index.html", output_dir / "js", output_dir / "app"]

# Prettier オプション
prettier_cmd = [
    "prettier",
    "--write",
    *[str(p) for p in targets],
    "--print-width",
    "120",
    "--prose-wrap",
    "never",
    "--tab-width",
    "4",
]

# コマンド実行（標準出力とエラーは抑制）
try:
    subprocess.run(
        prettier_cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
    )
except FileNotFoundError:
    print("❌ Prettier is not installed or not found in PATH.")
except subprocess.CalledProcessError:
    print("❌ Prettier failed to format one or more files.")

In [None]:
%%bash
set -e
uname -a  # OS name
date +"%Y/%m/%d %H:%M:%S"  # Last update