In [None]:
import numpy as np
import pandas as pd
import requests
import io
import ast

In [None]:
#replace with your CSV
df = pd.read_csv("ZincFinger_Classical_RBD.csv")

In [None]:
def fetch_go_terms_with_ids(uniprot_id):
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    response = requests.get(url)
    if response.status_code != 200:
        return {"F": {}, "P": {}, "C": {}}

    data = response.json()
    go_terms = {"F": {}, "P": {}, "C": {}}

    for ref in data.get("uniProtKBCrossReferences", []):
        if ref.get("database") == "GO":
            go_id = ref.get("id", "")
            props = {p["key"]: p["value"] for p in ref.get("properties", [])}
            term = props.get("GoTerm", "")
            evidence = props.get("GoEvidenceType", "")
            if len(term) > 2 and term[1] == ":":
                aspect = term[0]  # F, P, or C
                go_terms[aspect][term] = {"GO_ID": go_id, "Evidence": evidence}

    return go_terms

# 🔬 Example list of UniProt IDs
uniprot_ids = ["P47974"]

# 🧬 Build dataframe
records = []
for uid in uniprot_ids:
    go = fetch_go_terms_with_ids(uid)
    records.append({
        "UniProt_ID": uid,
        "Molecular Function": "; ".join(go["F"]),
        "Biological Process": "; ".join(go["P"]),
        "Cellular Component": "; ".join(go["C"])
    })

new_df = pd.DataFrame(records)
print(new_df)


In [None]:
def fetch_go_terms_cleaned(uniprot_id):
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    response = requests.get(url)
    if response.status_code != 200:
        return {"F": {}, "P": {}, "C": {}}

    data = response.json()
    go_terms = {"F": {}, "P": {}, "C": {}}

    for ref in data.get("uniProtKBCrossReferences", []):
        if ref.get("database") == "GO":
            go_id = ref.get("id", "")
            props = {p["key"]: p["value"] for p in ref.get("properties", [])}
            raw_term = props.get("GoTerm", "")
            evidence = props.get("GoEvidenceType", "")
            if len(raw_term) > 2 and raw_term[1] == ":":
                aspect = raw_term[0]  # F, P, or C
                label = raw_term[2:]  # Strip prefix
                go_terms[aspect][label] = {"GO_ID": go_id, "Evidence": evidence}

    return go_terms

In [None]:
uniprot_ids = ["P47974"]
records = []
for uid in uniprot_ids:
    go = fetch_go_terms_cleaned(uid)
    records.append({
        "UniProt_ID": uid,
        "Molecular Function": go["F"],
        "Biological Process": go["P"],
        "Cellular Component": go["C"]
    })

results = pd.DataFrame(records)


In [None]:
def enrich_with_go_terms(df):
    records = []
    for _, row in df.iterrows():
        uid = row["uniprot_id"]
        go = fetch_go_terms_cleaned(uid)
        records.append({
            "uniprot_id": uid,
            "Molecular Function": go["F"],
            "Biological Process": go["P"],
            "Cellular Component": go["C"]
        })
    return pd.DataFrame(records)

In [None]:
df_go = enrich_with_go_terms(df)

In [None]:
df_go.to_csv("GO_Terms.csv",index=False)

In [None]:
def fetch_cd_code_cleaned(uniprot_id):
    url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    response = requests.get(url)
    if response.status_code != 200:
        return []

    data = response.json()
    cd_code_terms = []

    for ref in data.get("uniProtKBCrossReferences", []):
        if ref.get("database") == "CD-CODE":
            cd_id = ref.get("id", "")
            props = {p["key"]: p["value"] for p in ref.get("properties", [])}
            entry_name = props.get("EntryName", "")
            if entry_name:
                cd_code_terms.append({
                    "EntryName": entry_name,
                    "CD-CODE ID": cd_id
                })

    return cd_code_terms


In [None]:
def fetch_cd_code_for_dataframe(df, id_column="uniprot_id"):
    results = []

    for uniprot_id in df[id_column]:
        annotations = fetch_cd_code_cleaned(uniprot_id)
        if annotations:
            for entry in annotations:
                results.append({
                    "UniProtID": uniprot_id,
                    "EntryName": entry["EntryName"],
                    "CD-CODE ID": entry["CD-CODE ID"]
                })
        else:
            results.append({
                "UniProtID": uniprot_id,
                "EntryName": None,
                "CD-CODE ID": None
            })

    return pd.DataFrame(results)

In [None]:
cd_codes = fetch_cd_code_for_dataframe(df)

In [None]:
# Filter out rows where EntryName is None
filtered_df = cd_codes[cd_codes["EntryName"].notna()]

# Count unique UniProt IDs in the filtered DataFrame
unique_count = filtered_df["UniProtID"].nunique()

print(f"Unique UniProt IDs with valid EntryName: {unique_count}")

In [None]:
cd_codes.to_csv("CD_Codes.csv",index=False)