In [None]:
import pandas as pd
import pubchempy as pcp
from IPython.display import display
import os
import re
import requests

%run analyse.ipynb # Import analysis and visualization functions


RESULTS_DIR = "results"                      
IMG_DIR     = os.path.join(RESULTS_DIR, "images")
PLOT_DIR    = os.path.join(RESULTS_DIR, "plots")  
RADAR_DIR   = os.path.join(PLOT_DIR, "radars")

for folder in (RESULTS_DIR, IMG_DIR, PLOT_DIR, RADAR_DIR):
    os.makedirs(folder, exist_ok=True)


def is_quit(q: str) -> bool:
    return str(q).strip().lower() in {"q", "quit"}
    

def detect_input_type(s: str) -> str:
    s = s.strip()
    if not s:
        return "name"
    if s.startswith("InChI="):
        return "inchi"
    if re.fullmatch(r"\d+", s):
        return "cid"
    if any(tok in s for tok in ["(=O)", "=0", "=", "#", "/", "\\"]):
        return "smiles"
    return "name"


PUBCHEM_PROPS = [
    "MolecularFormula", "MolecularWeight", "ExactMass",
    "CanonicalSMILES", "IsomericSMILES", "InChI", "InChIKey", "IUPACName",
    "XLogP", "TPSA", "HBondDonorCount", "HBondAcceptorCount",
    "RotatableBondCount", "HeavyAtomCount", "Charge"
]


def get_cid_by(ns: str, value: str):
    try:
        if ns == "cid":
            return int(float(value))
        res = pcp.get_compounds(value, namespace=ns)
        return res[0].cid if res else None
    except Exception:
        return None


def get_props(cid: int) -> dict:
    out = {k: None for k in PUBCHEM_PROPS}
    try:
        props = pcp.get_properties(PUBCHEM_PROPS, cid, namespace="cid")
        if props and isinstance(props, list):
            out.update(props[0])
    except Exception:
        pass

    if not out["CanonicalSMILES"] and not out["IsomericSMILES"]:
        try:
            url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/CanonicalSMILES/TXT"
            r = requests.get(url, timeout=10)
            if r.ok:
                out["CanonicalSMILES"] = r.text.strip() or None
        except Exception:
            pass
    return out


def download_png(cid: int):
    if not cid:
        return None
    path = os.path.join(IMG_DIR, f"{cid}.png")
    try:
        url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/PNG"
        r = requests.get(url, timeout=10)
        if r.ok:
            with open(path, "wb") as f:
                f.write(r.content)
            return path
    except Exception:
        pass
    return None


def fetch_data(val: str, typ: str) -> dict:
    try:
        ns = {"cid": "cid", "name": "name", "smiles": "smiles", "inchi": "inchi"}.get(typ, "name")
        cid = get_cid_by(ns, val) or (get_cid_by("name", val) if typ == "smiles" else None)

        if cid is None:
            return {"input": val, "input_type": typ, "status": "not_found"}

        p = get_props(cid)

        return {
            "input": val, "input_type": typ, "status": "ok", "cid": cid,
            "name": p.get("IUPACName"),
            "smiles": p.get("IsomericSMILES") or p.get("CanonicalSMILES"),
            "formula": p.get("MolecularFormula"),
            "mass": p.get("MolecularWeight"), "exact_mass": p.get("ExactMass"),
            "logP": p.get("XLogP"), "TPSA": p.get("TPSA"),
            "H_donors": p.get("HBondDonorCount"), "H_acceptors": p.get("HBondAcceptorCount"),
            "rotatable_bonds": p.get("RotatableBondCount"),
            "heavy_atoms": p.get("HeavyAtomCount"), "charge": p.get("Charge"),
            "InChI": p.get("InChI"), "InChIKey": p.get("InChIKey"),
            "pubchem_url": f"https://pubchem.ncbi.nlm.nih.gov/compound/{cid}"
        }
    except Exception as e:
        print("Error:", e)
        return {"input": val, "input_type": typ, "status": "error"}


# ---------- CSV handling ----------
def load_from_csv_df(path: str) -> pd.DataFrame:
    """Load a list of compounds from CSV and return a DataFrame with properties."""
    try:
        df = pd.read_csv(path)
        if df.shape[1] == 1:
            col = df.columns[0]
            records = [fetch_data(x, detect_input_type(x)) for x in df[col].dropna()]
            return pd.DataFrame(records)
        return df
    except Exception as e:
        print("CSV load error:", e)
        return pd.DataFrame()


# ---------- Manual input ----------
def manual_input_df():
    print("Enter 'q' anytime to cancel")
    while True:
        n = input("How many compounds? (0/q to exit): ").strip()
        if is_quit(n) or n == "0":
            return pd.DataFrame()
        try:
            n = int(n)
            if n > 0:
                break
        except:
            print("Enter integer > 0")
    rows = []
    for i in range(n):
        val = input(f"Compound {i+1}: ").strip()
        if is_quit(val):
            break
        rows.append(fetch_data(val, detect_input_type(val)))
    return pd.DataFrame(rows)


# ---------- Menu ----------
def show_menu():
    print("\nWelcome to Molecular Property Analyzer")
    print("=" * 40)
    print("1. Manual input")
    print("2. Load from CSV")
    print("0. Exit")


# ---------- Entry point ----------
def main():
    while True:
        show_menu()
        choice = input("Choose option (0–2): ").strip()

        if choice == "1":
            df = manual_input_df()
            if not df.empty:
                df = add_rules_and_plots(df, PLOT_DIR, RADAR_DIR)
                display(df.head(5))
                df.to_csv(os.path.join(RESULTS_DIR, "results_manual.csv"), index=False)
                print("Saved to results/results_manual.csv")

        elif choice == "2":
            p = input("CSV path (q to exit): ").strip().strip('"')
            if is_quit(p):
                continue
            if not os.path.exists(p):
                print("File not found:(")
                continue
            df = load_from_csv_df(p)
            if not df.empty:
                df = add_rules_and_plots(df, PLOT_DIR, RADAR_DIR)
                display(df.head(5))
                df.to_csv(os.path.join(RESULTS_DIR, "results_from_csv.csv"), index=False)
                print("Saved to results/results_from_csv.csv")

        elif choice == "0":
            print("Bye ;)")
            break
        else:
            print("Wrong option :(")
            
if __name__ == "__main__":
    main()