In [None]:
# (선택) Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# ▼ 1) 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

# ▼ 2) ZIP과 압축 해제 목적지(드라이브 상의 영구 경로) 지정
BASE_DIR   = "/content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets"
INTL_ZIP   = f"{BASE_DIR}/SnomedCT_InternationalRF2_PRODUCTION_20251001T120000Z.zip"

# 드라이브에 그대로 풀어둘 폴더(영구 보관). 필요시 다른 경로로 바꿔도 됨.
EXTRACT_DIR = f"{BASE_DIR}/SNOMED_International_2025-10"

import os, glob, subprocess

# ▼ 3) 압축 해제 (이미 풀려있으면 스킵)
os.makedirs(EXTRACT_DIR, exist_ok=True)

already_has_files = bool(glob.glob(EXTRACT_DIR + "/**/Snapshot/Terminology/*.txt", recursive=True))
if not already_has_files:
    # 깨끗이 다시 풀고 싶으면 아래 두 줄 주석 해제:
    # subprocess.run(["rm","-rf", EXTRACT_DIR], check=False)
    # os.makedirs(EXTRACT_DIR, exist_ok=True)
    print("Unzipping to Drive (this is persistent) ...")
    subprocess.run(["unzip", "-q", INTL_ZIP, "-d", EXTRACT_DIR], check=True)
else:
    print("Found existing extracted files in Drive. Skipping unzip.")

# ▼ 4) 주요 경로 자동 탐색해서 변수로 잡기 (다른 노트북에서도 이 변수 복붙해서 쓰면 됨)
def find_one(patterns):
    for p in patterns:
        hits = glob.glob(p, recursive=True)
        if hits:
            hits.sort()
            return hits[-1]
    return None

INTL_TERM = find_one([EXTRACT_DIR + "/**/Snapshot/Terminology"])
INTL_LANG = find_one([EXTRACT_DIR + "/**/Snapshot/Refset/Language"])
DESC_FILE   = find_one([f"{INTL_TERM}/sct2_Description_Snapshot*.txt", f"{INTL_TERM}/**/sct2_Description_Snapshot*.txt"])
LANGREF_FILE= find_one([f"{INTL_LANG}/der2_cRefset_LanguageSnapshot*.txt", f"{INTL_LANG}/**/der2_cRefset_LanguageSnapshot*.txt"])

print("=== Persistent Paths on Drive ===")
print("EXTRACT_DIR :", EXTRACT_DIR)
print("Terminology :", INTL_TERM)
print("Language    :", INTL_LANG)
print("DESC file   :", DESC_FILE)
print("LANG file   :", LANGREF_FILE)

# ▼ 5) 경로가 잘 잡히는지 간단 체크(선택)
if not DESC_FILE or not LANGREF_FILE:
    print("\n[WARN] Snapshot 파일이 탐지되지 않았어요. 아래를 참고해 패턴을 조정하세요:")
    for p in glob.glob(EXTRACT_DIR + "/**/*", recursive=True)[:50]:
        print(" -", p)


Mounted at /content/drive
Unzipping to Drive (this is persistent) ...
=== Persistent Paths on Drive ===
EXTRACT_DIR : /content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets/SNOMED_International_2025-10
Terminology : /content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets/SNOMED_International_2025-10/SnomedCT_InternationalRF2_PRODUCTION_20251001T120000Z/Snapshot/Terminology
Language    : /content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets/SNOMED_International_2025-10/SnomedCT_InternationalRF2_PRODUCTION_20251001T120000Z/Snapshot/Refset/Language
DESC file   : /content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets/SNOMED_International_2025-10/SnomedCT_InternationalRF2_PRODUCTION_20251001T120000Z/Snapshot/Terminology/sct2_Description_Snapshot-en_INT_20251001.txt
LANG file   : /content/drive/MyDrive/DILAB/OK/DI_LAB/MARS_Datathon/Datasets/SNOMED_CT_datasets/SNOMED_International_2025-10/S

In [2]:
!pip -q install duckdb