In [12]:
# --- RCSB PDB mmCIF toplu indirici (ilk 4 haneye göre) ---
# 1) RAW_IDS içine listeyi yapıştır
# 2) Çalıştır -> cif_files/ klasörü + cif_files.zip + summary.csv

import os, io, gzip, time, csv, zipfile, shutil, textwrap, re
from pathlib import Path
from urllib.request import urlopen, Request
from urllib.error import HTTPError, URLError

RAW_IDS = """
1g2cF
5ec5P
1uxmK
7ahlE
5aoeB
1ovaA
3gmhL
3m1bF
3j7wB
2lqwA
5ejbC
2frhA
5hmgB
4j3oF
4hddA
1miqB
4nc9C
3j97M
2naoF
5c1vA
4zt0C
5jytA
2lejA
3jv6A
5k5gA
4uv2D
1wyyB
5fhcJ
4wsgC
1nqdA
5fluE
3qy2A
2ougC
1qomB
4rr2D
2gedB
5keqF
4y0mJ
2c1uC
4zrbC
4qhfA
4aanA
1x0gA
4ae0A
1mnmC
2nntA
4jphB
3ifaA
3k2sA
4fu4C
1h38D
5b3zA
1xjtA
3hdeA
4b3oB
4twaA
3ejhA
1k0nA
1xntA
2axzA
4gqcC
4o0pA
4dxtA
4rwnA
2hdmA
3vo9B
2p3vA
3zwgN
3ewsB
3tp2A
3njqA
4rmbA
2ce7C
4phqA
3t1pA
3j9cA
2nxqB
5l35D
5i2mA
5f3kA
4qdsA
5jzhA
4pyiA
5ineA
1mbyA
2a73B
2k0qA
3uyiA
4a5wB
1rkpA
1ceeB
3o44A
1repC
3kuyA
2n0aD
4m4rA
5c6bF
3zxgB
2namA
4yhdG
5ly6B
1jtiB
2vfxL
3lowA
3j7vG
2bzyB
1wp8C
1fzpD
1htmB
2jmrA
2lepA
1qs8B
4n9wA
1xtgB
1iytA
5c1vB
4cmqB
2qkeE
2lv1A
1zk9A
2kb8A
4q79F
5wrgA
1eboE
1svfC
1nqjB
2uy7D
1qb3A
2lclA
1nocA
3l9qB
1nrjB
1dzlA
4xwsD
2c1vB
4zrbH
4qhhA
4aalA
1x0gD
4ow6B
1mnmD
2mwfA
5hk5H
5et5A
2a01C
4g0dZ
1qlnA
5bmyA
1xjuB
3hdfA
3meeA
4ydqB
3m7pA
1rk4B
3lqcA
2grmB
4gqcB
4o01D
4dxrA
4rwqB
2n54B
3vpaD
2p3vD
4tsyD
3g0hA
5lj3M
2pbkB
4rmbB
3kdsG
2wcdX
1kctA
3q8fA
1jfkA
5l35G
5i2sA
5f5rB
2qqjA
5jztG
4pyjA
3mkoA
4yypA
3l5nB
2lelA
3v0tA
3t5oA
2h44A
2k42A
1xezA
2z9oB
5c3iF
2kkwA
4w50B
""".strip()

# --- Yardımcılar ---
def normalize_first4(token: str) -> str:
    # ilk 4 alfasayısal karakter; pdb id harf+baz rakam olabilir (ör: 1g2c)
    token = token.strip()
    m = re.search(r'([0-9A-Za-z]{4})', token)
    return m.group(1).lower() if m else ""

def fetch_cif(pdb4: str, out_path: Path, pause=0.15) -> tuple[bool,str]:
    """RCSB'den .cif dene; 404 ise .cif.gz indir ve aç. True/False, status döner."""
    base = pdb4.upper()
    urls = [
        f"https://files.rcsb.org/download/{base}.cif",
        f"https://files.rcsb.org/download/{base}.cif.gz",
    ]
    # .cif
    try:
        req = Request(urls[0], headers={"User-Agent":"Mozilla/5.0"})
        with urlopen(req, timeout=30) as r, open(out_path, "wb") as f:
            f.write(r.read())
        time.sleep(pause)
        return True, "cif"
    except (HTTPError, URLError) as e1:
        # .cif.gz
        try:
            req = Request(urls[1], headers={"User-Agent":"Mozilla/5.0"})
            with urlopen(req, timeout=30) as r:
                data = r.read()
            try:
                data = gzip.decompress(data)
            except OSError:
                # bazı sunucular zaten açılmış dönebilir
                pass
            with open(out_path, "wb") as f:
                f.write(data)
            time.sleep(pause)
            return True, "cif.gz→cif"
        except (HTTPError, URLError) as e2:
            return False, f"NOT_FOUND ({getattr(e1,'code',e1)} / {getattr(e2,'code',e2)})"

# --- Girdi işle ---
tokens = [t for t in RAW_IDS.splitlines() if t.strip()]
pdb4_list = [normalize_first4(t) for t in tokens]
pdb4_list = [p for p in pdb4_list if p]
unique_ids = sorted(set(pdb4_list))

print(f"Girdi satırı: {len(tokens)} | Geçerli ilk4: {len(pdb4_list)} | Tekil PDB ID: {len(unique_ids)}")
if len(unique_ids) < len(pdb4_list):
    print(f"(Bilgi) Yinelenenler atıldı: {len(pdb4_list)-len(unique_ids)} adet")

# --- Çıktı klasörleri ---
root = Path("./cif_files")
if root.exists():
    shutil.rmtree(root)
root.mkdir(parents=True, exist_ok=True)

summary_rows = []
ok, fail = 0, 0

# --- İndirme döngüsü ---
for i, pdb4 in enumerate(unique_ids, 1):
    out_path = root / f"{pdb4.upper()}.cif"
    success, status = fetch_cif(pdb4, out_path)
    if success:
        ok += 1
        print(f"[{i:03d}/{len(unique_ids)}] {pdb4.upper()}  ✓  ({status})")
        summary_rows.append((pdb4.upper(), "OK", status, str(out_path)))
    else:
        fail += 1
        print(f"[{i:03d}/{len(unique_ids)}] {pdb4.upper()}  ✗  {status}")
        summary_rows.append((pdb4.upper(), "FAIL", status, ""))

# --- Özet CSV ---
csv_path = Path("summary.csv")
with open(csv_path, "w", newline="") as f:
    w = csv.writer(f)
    w.writerow(["pdb4", "result", "detail", "path"])
    w.writerows(summary_rows)

# --- Ziple ---
zip_path = Path("cif_files.zip")
if zip_path.exists():
    zip_path.unlink()
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
    # dosyalar
    for p in root.glob("*.cif"):
        z.write(p, arcname=p.name)
    # özet dosyası
    z.write(csv_path, arcname=csv_path.name)

print("\n--- BİTTİ ---")
print(f"Başarılı: {ok} | Başarısız: {fail}")
print(f"Zip: {zip_path.resolve()}")
print(f"Özet: {csv_path.resolve()}")



Girdi satırı: 192 | Geçerli ilk4: 192 | Tekil PDB ID: 184
(Bilgi) Yinelenenler atıldı: 8 adet
[001/184] 1CEE  ✓  (cif)
[002/184] 1DZL  ✓  (cif)
[003/184] 1EBO  ✓  (cif)
[004/184] 1FZP  ✓  (cif)
[005/184] 1G2C  ✓  (cif)
[006/184] 1H38  ✓  (cif)
[007/184] 1HTM  ✓  (cif)
[008/184] 1IYT  ✓  (cif)
[009/184] 1JFK  ✓  (cif)
[010/184] 1JTI  ✓  (cif)
[011/184] 1K0N  ✓  (cif)
[012/184] 1KCT  ✓  (cif)
[013/184] 1MBY  ✓  (cif)
[014/184] 1MIQ  ✓  (cif)
[015/184] 1MNM  ✓  (cif)
[016/184] 1NOC  ✓  (cif)
[017/184] 1NQD  ✓  (cif)
[018/184] 1NQJ  ✓  (cif)
[019/184] 1NRJ  ✓  (cif)
[020/184] 1OVA  ✓  (cif)
[021/184] 1QB3  ✓  (cif)
[022/184] 1QLN  ✓  (cif)
[023/184] 1QOM  ✓  (cif)
[024/184] 1QS8  ✓  (cif)
[025/184] 1REP  ✓  (cif)
[026/184] 1RK4  ✓  (cif)
[027/184] 1RKP  ✓  (cif)
[028/184] 1SVF  ✓  (cif)
[029/184] 1UXM  ✓  (cif)
[030/184] 1WP8  ✓  (cif)
[031/184] 1WYY  ✓  (cif)
[032/184] 1X0G  ✓  (cif)
[033/184] 1XEZ  ✓  (cif)
[034/184] 1XJT  ✓  (cif)
[035/184] 1XJU  ✓  (cif)
[036/184] 1XNT  ✓  (cif)
[037/1

In [13]:
from google.colab import files

# Tek tek indir
files.download("cif_files.zip")     # tüm cif dosyaları
files.download("summary.csv")       # özet tablo


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
!apt-get -y update && apt-get -y install dssp
!which mkdssp || !which dssp
!mkdssp -V || !dssp -V


Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:2 https://cli.github.com/packages stable InRelease
Hit:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://security.ubuntu.com/ubuntu jammy-security InRelease
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,582 kB]
Fetched 1,837 kB in 3s (534 kB/s)
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' a

In [15]:
# --- DSSP ÜRETİMİ (sağlam) ---
import shutil, subprocess, os
from pathlib import Path

dssp_dir = Path("dssp_files")
dssp_dir.mkdir(exist_ok=True)

def _resolve_mkdssp_path():
    # Bazı sistemlerde ikili adı "mkdssp", bazılarında "dssp"
    path = shutil.which("mkdssp") or shutil.which("dssp")
    if not path:
        raise FileNotFoundError(
            "DSSP bulunamadı. Lütfen önce şu hücreyi çalıştırın:\n"
            "!apt-get -y update && !apt-get -y install dssp"
        )
    return path

MKDSSP_BIN = _resolve_mkdssp_path()

def run_dssp_for_cif(cif_path: Path, out_dir: Path) -> tuple[bool, str]:
    """mkdssp ile .dssp üretir; extended formatı otomatik dener."""
    pdb_id = cif_path.stem
    dssp_path = out_dir / f"{pdb_id}.dssp"
    if dssp_path.exists() and dssp_path.stat().st_size > 1000:
        return True, "skip_exists"

    # Normal format
    try:
        res = subprocess.run(
            [MKDSSP_BIN, "-i", str(cif_path), "-o", str(dssp_path)],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=120
        )
        if res.returncode == 0 and dssp_path.exists() and dssp_path.stat().st_size > 1000:
            return True, "ok"
        # Extended format fallback
        res2 = subprocess.run(
            [MKDSSP_BIN, "--output-format", "dssp-extended",
             "-i", str(cif_path), "-o", str(dssp_path)],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=120
        )
        if res2.returncode == 0 and dssp_path.exists() and dssp_path.stat().st_size > 1000:
            return True, "ok (extended)"
        detail = (res.stderr or res2.stderr or "unknown error").strip().splitlines()[-1]
        return False, detail
    except FileNotFoundError:
        return False, "mkdssp not found (install dssp)"
    except Exception as e:
        return False, f"exception: {e}"


In [16]:
# --- Tüm .cif dosyaları için çalıştır ---
cif_list = sorted(Path("./cif_files").glob("*.cif"))
dssp_ok, dssp_fail = 0, 0
log_rows = []

for i, cif_file in enumerate(cif_list, 1):
    okflag, msg = run_dssp_for_cif(cif_file, dssp_dir)
    if okflag:
        dssp_ok += 1
        print(f"[{i:03d}/{len(cif_list)}] {cif_file.stem} ✓ {msg}")
        log_rows.append((cif_file.stem, "OK", msg))
    else:
        dssp_fail += 1
        print(f"[{i:03d}/{len(cif_list)}] {cif_file.stem} ✗ {msg}")
        log_rows.append((cif_file.stem, "FAIL", msg))

# Özet CSV
dssp_csv = Path("dssp_summary.csv")
with open(dssp_csv, "w", newline="") as f:
    import csv
    w = csv.writer(f)
    w.writerow(["pdb_id", "result", "detail"])
    w.writerows(log_rows)

print("\n--- DSSP TAMAMLANDI ---")
print(f"Toplam CIF: {len(cif_list)} | Başarılı: {dssp_ok} | Başarısız: {dssp_fail}")
print(f"DSSP klasörü: {dssp_dir.resolve()}")
print(f"Özet: {dssp_csv.resolve()}")


[001/184] 1CEE ✓ skip_exists
[002/184] 1DZL ✓ skip_exists
[003/184] 1EBO ✓ skip_exists
[004/184] 1FZP ✓ skip_exists
[005/184] 1G2C ✓ ok
[006/184] 1H38 ✓ skip_exists
[007/184] 1HTM ✓ skip_exists
[008/184] 1IYT ✓ skip_exists
[009/184] 1JFK ✓ skip_exists
[010/184] 1JTI ✓ skip_exists
[011/184] 1K0N ✓ skip_exists
[012/184] 1KCT ✓ skip_exists
[013/184] 1MBY ✓ skip_exists
[014/184] 1MIQ ✓ skip_exists
[015/184] 1MNM ✓ skip_exists
[016/184] 1NOC ✓ skip_exists
[017/184] 1NQD ✓ skip_exists
[018/184] 1NQJ ✓ skip_exists
[019/184] 1NRJ ✓ skip_exists
[020/184] 1OVA ✓ skip_exists
[021/184] 1QB3 ✓ skip_exists
[022/184] 1QLN ✓ skip_exists
[023/184] 1QOM ✓ skip_exists
[024/184] 1QS8 ✓ skip_exists
[025/184] 1REP ✓ skip_exists
[026/184] 1RK4 ✓ skip_exists
[027/184] 1RKP ✓ skip_exists
[028/184] 1SVF ✓ skip_exists
[029/184] 1UXM ✓ skip_exists
[030/184] 1WP8 ✓ skip_exists
[031/184] 1WYY ✓ skip_exists
[032/184] 1X0G ✓ skip_exists
[033/184] 1XEZ ✓ skip_exists
[034/184] 1XJT ✓ skip_exists
[035/184] 1XJU ✓ skip_e

In [17]:
import shutil
shutil.make_archive("all_dssp", "zip", "dssp_files")  # all_dssp.zip oluşturur
from google.colab import files
files.download("all_dssp.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>