In [None]:
# === Canonical TNFSF family (human) — fetch from UniProt and build NFSF_FAMILY ===
# Requirements: internet on your machine + Python stdlib (urllib). No extra installs needed.

import sys, ssl, urllib.request, textwrap, re

ACCESSIONS = {
    "TNFSF15_TL1A":  "O95150",  # TL1A
    "TNFSF10_TRAIL": "P50591",  # TRAIL
    "TNFSF14_LIGHT": "O43557",  # LIGHT
    "TNFSF12_TWEAK": "O43508",  # TWEAK
    "TNFSF2_TNF":    "P01375",  # TNF-α
    "TNFSF1_LTA":    "P01374",  # Lymphotoxin-α
}

def fetch_uniprot_fasta(acc):
    # UniProt REST (new): https://rest.uniprot.org/uniprotkb/{accession}.fasta
    url = f"https://rest.uniprot.org/uniprotkb/{acc}.fasta"
    ctx = ssl.create_default_context()
    with urllib.request.urlopen(url, context=ctx, timeout=30) as r:
        data = r.read().decode("utf-8")
    # Parse the first (canonical) record
    lines = [ln.strip() for ln in data.splitlines() if ln.strip()]
    assert lines and lines[0].startswith(">"), f"FASTA header missing for {acc}"
    seq = "".join(ln for ln in lines[1:] if not ln.startswith(">"))
    return seq

AA20 = set("ACDEFGHIKLMNPQRSTVWY")
def qc(seq):
    issues=[]
    if any(ch not in AA20 for ch in seq):
        bad = sorted(set(ch for ch in seq if ch not in AA20))
        issues.append(f"non-20AA:{bad}")
    if re.search(r"\s", seq):
        issues.append("whitespace")
    nxs = len([m for m in re.finditer(r"N[^P][ST]", seq)])
    return issues, nxs

NFSF_FAMILY = {}
report_rows = []
for label, acc in ACCESSIONS.items():
    try:
        seq = fetch_uniprot_fasta(acc)
    except Exception as e:
        print(f"[ERROR] Fetch {label} ({acc}): {e}")
        raise
    issues, nxs = qc(seq)
    if issues:
        print(f"[WARN] {label}: {';'.join(issues)}")
    NFSF_FAMILY[label] = seq
    # Compact voucher for your report
    report_rows.append({
        "label": label, "accession": acc, "length": len(seq),
        "NXS/T_motifs": nxs,
        "head10": seq[:10], "tail10": seq[-10:]
    })

# Pretty-print a short voucher table you can tuck into REPORT.md
print("=== Canonical TNFSF (human) — voucher ===")
for r in report_rows:
    print(f"{r['label']:15s}  {r['accession']:7s}  len={r['length']:4d}  NXS/T={r['NXS/T_motifs']}"
          f"  head={r['head10']}.. tail=..{r['tail10']}")

# NFSF_FAMILY is now canonical and ready for the 6-mer / local alignment checks.



In [None]:
# Cross-reactivity analysis using canonical TNFSF sequences from Cell 0
print("=== Cross-reactivity analysis ===")
# Use NFSF_FAMILY from the previous cell (canonical UniProt sequences)
FAMILY = NFSF_FAMILY

# Compute and print top-3 overlaps for each clone
for clone in sorted(CDRS.keys()):
    scores = xreact_scores(clone)
    top3 = scores[:3]
    print(f"{clone}: Top3 = {top3}")
print("\nNote: Overlaps with canonical sequences; wet X-reactivity panel retained for TRAIL, LIGHT, TWEAK, TNF, LTA.")


In [None]:
def kmer_set(seq,k=6): return {seq[i:i+k] for i in range(len(seq)-k+1)}

def paratope_concat(Hs, Ls): return Hs["H3"]+Hs["H2"]+Ls["L3"]+Hs["H1"]+Ls["L1"]

CDRS = {
 "Fab01":{"H1":"SGYSMHIN","H2":"ITYDGGDSNYNPGLKD","H3":"CARGYGNGDWYFDYFDY","L1":"SNYGTSY","L2":"DAS","L3":"QQYNNWPT"},
 "Fab02":{"H1":"SSYAMHIN","H2":"ISFDGGDTNYNPALKD","H3":"CARDFYGGDWYFDYFDY","L1":"SNYGMSY","L2":"DSS","L3":"QQYDSWPT"},
 "Fab03":{"H1":"SSYAMHIN","H2":"ISYDGGDTNYNPSLKD","H3":"CARGYGNGDWYFDYFDY","L1":"SSYGMSY","L2":"DAS","L3":"QQYDSWPT"},
 "Fab04":{"H1":"SSYAMHIN","H2":"ISYDGGDANYNPNLKD","H3":"CARGYGSGDWYFDYFDY","L1":"SNYGTSY","L2":"DAS","L3":"QQYDSWPT"},
 "Fab05":{"H1":"SSYAMHIN","H2":"ISYDGGDANYNPNLKD","H3":"CARGYGSGDWYFDYFDY","L1":"SNYGMSY","L2":"DAS","L3":"QQYDSWPT"},
 "Fab06":{"H1":"SGYSMHIN","H2":"ISYDGGDANYNPNLKD","H3":"CARGLYGSDWYFDYFDY","L1":"SNYGTSY","L2":"DAS","L3":"QQYNNYPT"},
 "Fab07":{"H1":"GSYAMYIN","H2":"ISYDGGDTNYNPSLKD","H3":"CARDFYGGDWYFDYFDY","L1":"SNYGTSY","L2":"DSS","L3":"QQYDSWPT"},
 "Fab08":{"H1":"SGYSMHIN","H2":"ISYDGGDTNYNPSLKD","H3":"CARDFYGGDWYFDYFDY","L1":"SNYGTSY","L2":"DAS","L3":"QQYNNWPT"},
 "Fab09":{"H1":"SSYGLHIN","H2":"ISYDGGDANYNPNLKD","H3":"CARGYSSGDWYFDYFDY","L1":"SSYGMSY","L2":"DAS","L3":"QQYDSWPT"},
 "Fab10":{"H1":"GSYAMYIN","H2":"ISYDGGDTNYNPSLKD","H3":"CARDFYGGDWYFDYFDY","L1":"SSYGMSY","L2":"DSS","L3":"QQYNTWPT"},
 "Fab11":{"H1":"SGYSMHIN","H2":"ISYDGGDANYNPNLKD","H3":"CARGYSSGDWYFDYFDY","L1":"SNYGTSY","L2":"DAS","L3":"QQYNNWPT"},
 "Fab12":{"H1":"SGYSMHIN","H2":"ISYDGGDANYNPNLKD","H3":"CARDFYGGDWYFDYFDY","L1":"SSYGMSY","L2":"DAS","L3":"QQYNNWPT"}
}

FAMILY = {
  # Paste TNFSF sequences here to evaluate overlaps.
}

def xreact_scores(clone):
    Hs = {k:CDRS[clone][k] for k in ["H1","H2","H3"]}
    Ls = {k:CDRS[clone][k] for k in ["L1","L2","L3"]}
    p = paratope_concat(Hs,Ls)
    pset = kmer_set(p,6)
    out=[]
    for name,seq in FAMILY.items():
        overlap = len(pset & kmer_set(seq,6))
        out.append((name, overlap))
    return sorted(out, key=lambda x: x[1], reverse=True)

# Example (FAMILY empty by default):
print(xreact_scores("Fab06"))


In [None]:
# Local alignment hotspot scan (12–15 aa windows) vs TNFSF family
# Prints best identity %% and positions; flags >=40%% identity

def best_local_identity(query, target, wmin=12, wmax=15):
    best=(0.0, ('',-1,-1))
    for w in range(wmin, wmax+1):
        for i in range(len(query)-w+1):
            qwin = query[i:i+w]
            for j in range(len(target)-w+1):
                twin = target[j:j+w]
                ident = sum(qa==ta for qa,ta in zip(qwin,twin))/w
                if ident>best[0]:
                    best=(ident,(w,i,j))
    return best

print("=== Local alignment hotspot scan ===")
for clone in sorted(CDRS.keys()):
    Hs = {k:CDRS[clone][k] for k in ["H1","H2","H3"]}
    Ls = {k:CDRS[clone][k] for k in ["L1","L2","L3"]}
    p = paratope_concat(Hs,Ls)
    for fam, seq in (FAMILY.items() if FAMILY else NFSF_FAMILY.items()):
        ident,(w,i,j)=best_local_identity(p, seq)
        flag = "AMBER" if ident>=0.40 else "-"
        print(f"{clone} vs {fam}: best {ident*100:.1f}% over {w}aa at p[{i}] vs fam[{j}] {flag}")
