---

# Main Script to Run

---

## Steps

1. Open a Paper on Zotero
2. Copy the paper into a text file named "paper.txt". Save this file into a folder named after the author. (ex. Doe et al., 2015)
3. Create annotations, extract them into a text file, and save them as "notes.txt"
4. Drag & drop the folders into the "_inbox" folder, in the same parent folder where all the paper folders are stored. The folders in the _inbox folder will be the ones that the script finds and acts on.
5. Run the Script 
6. A set of Anki flashcards will be generated

## complete example

In [None]:
import os, html2text, requests
from dotenv import load_dotenv
from pyzotero import zotero


# library I want to use
library = "63.001 Neural Engineering and Signal Processing"


# ─── Config ────────────────────────────────────────────────────────────────
load_dotenv()
ZOTERO = zotero.Zotero(
    os.getenv("ZOTERO_USER_ID"),
    os.getenv("ZOTERO_LIBRARY_TYPE", "user"),
    os.getenv("ZOTERO_API_KEY")
)
ANKI_URL       = "http://127.0.0.1:8765"
OPENAI_URL     = "https://api.openai.com/v1/chat/completions"
OPENAI_KEY     = os.getenv("OPENAI_API_KEY")
PARENT_DECK    = f"CMU.Automated Reviews::{library}"
VERBOSE        = True   # ← flip to False to silence output

H2M = html2text.HTML2Text();  H2M.ignore_links = True

# ─── Helpers ───────────────────────────────────────────────────────────────
def vprint(*msg):
    if VERBOSE: print(*msg)

def all_collections(limit=100):
    out, start = [], 0
    while True:
        page = ZOTERO.collections(limit=limit, start=start)
        out.extend(page)
        if len(page) < limit: break
        start += limit
    return out

def fetch_items(coll_key, limit=100):
    out, start = [], 0
    while True:
        page = ZOTERO.collection_items(coll_key, limit=limit, start=start)
        out.extend(page)
        if len(page) < limit: break
        start += limit
    return out

def existing_decks():
    r = requests.post(ANKI_URL, json={"action":"deckNames","version":6}).json()
    return set(r.get("result", []))

def ensure_deck(deck):
    requests.post(ANKI_URL, json={
        "action":"createDeck","version":6,"params":{"deck":deck}
    })

def push_card(deck, front, back):
    requests.post(ANKI_URL, json={
        "action":"addNotes","version":6,
        "params":{"notes":[{
            "deckName":deck,"modelName":"Basic",
            "fields":{"Front":front,"Back":back},
            "tags":["paper","notecard"]
        }]}
    })

def generate_cards(text):
    hdr = {"Content-Type":"application/json","Authorization":f"Bearer {OPENAI_KEY}"}
    data = {
        "model":"gpt-3.5-turbo",
        "temperature":0.7,
        "messages":[
            {"role":"system","content":(
                "You are given annotations from a research paper. "
                "Create concise Q/A notecards (≤15). Each card starts with Q:/A: "
                "Include paper reference in the question.")
            },
            {"role":"user","content":text}
        ]
    }
    r = requests.post(OPENAI_URL, headers=hdr, json=data).json()
    return r["choices"][0]["message"]["content"]

def parse_cards(txt):
    q, a, out = "", "", []
    for ln in txt.splitlines():
        ln = ln.strip()
        if ln.startswith("Q:"):
            if q and a: out.append((q,a))
            q, a = ln[2:].strip(), ""
        elif ln.startswith("A:"):
            a = ln[2:].strip()
    if q and a: out.append((q,a))
    return out

# ─── Main ───────────────────────────────────────────────────────────────────
def run(collection_name):
    vprint(f"Looking for collection: {collection_name}")
    coll_key = next(
        c["data"]["key"] for c in all_collections()
        if c["data"]["name"] == collection_name
    )
    vprint("Collection key:", coll_key)

    items = fetch_items(coll_key)
    vprint(f"Total items pulled: {len(items)}")

    # split items
    notes = [i for i in items if i["data"]["itemType"]=="note"]

    # index every pulled item by key so we can walk parent links fast
    items_by_key = {i["key"]: i for i in items}

    def top_level_key(k):
        """Follow parentItem links until we reach a top‑level item."""
        while True:
            itm = items_by_key.get(k)
            parent = itm and itm["data"].get("parentItem")
            if not parent:
                return k          # k is now a top‑level item
            k = parent            # climb one level

    annos = {}
    for n in notes:
        md = H2M.handle(n["data"]["note"]).strip()
        head = md.lower()[:80]
        if "annotations" not in head:
            continue

        top_key = top_level_key(n["data"]["parentItem"])
        annos.setdefault(top_key, []).append(md)
        vprint(f"    • note {n['key']} bucketed under TOP {top_key}")

    # troubleshooting: used to make sure the right notes are captured
    for pid, txts in annos.items():
        print(f"PARENT {pid}: {len(txts)} annotation‑notes")
        # If you want to see the first 60 chars of each note:
        for t in txts:
            print("   ↳", repr(t[:60]))

    print(f"[+] Papers with matching notes: {len(annos)}")



    papers_by_id = {
        i["key"]: i for i in items
        if i["data"]["itemType"] in {"journalArticle","conferencePaper","report"}
    }
    vprint(f"Papers: {len(papers_by_id)}   Notes: {len(notes)}")

    decks_exist = existing_decks()

    for pid, txts in annos.items():
        paper = papers_by_id.get(pid)
        if not paper:
            vprint("Orphan note, skipping:", pid);  continue

        creator = paper["data"]["creators"][0]
        author  = creator.get("lastName","Unknown")
        year    = paper["data"].get("date","")[:4] or "n.d."
        deck    = f"{PARENT_DECK}::{author} et al., {year}"

        if deck not in decks_exist:
            ensure_deck(deck); decks_exist.add(deck); vprint("Created deck:", deck)

        notes_block = "\n\n".join(txts)
        vprint(f"Generating cards for {author} {year}  (notes={len(txts)})")
        cards = parse_cards(generate_cards(notes_block))
        vprint(f"  → {len(cards)} cards")

        for q,a in cards:
            push_card(deck,q,a)

if __name__ == "__main__":
    run(library)


Looking for collection: 63.001 Neural Engineering and Signal Processing
Collection key: N9F6KIU3
Total items pulled: 16
    • note 9X4ITAP5 bucketed under TOP WVEM3QEJ
    • note YZE9W56W bucketed under TOP NWRNN2L8
PARENT WVEM3QEJ: 1 annotation‑notes
   ↳ '# Annotations  \n(3/19/2025, 9:57:50 AM)\n\n“One hypothesis sug'
PARENT NWRNN2L8: 1 annotation‑notes
   ↳ '# Annotations  \n(11/3/2024, 7:26:25 PM)\n\n“Main results. The '
[+] Papers with matching notes: 2
Papers: 3   Notes: 5
Created deck: CMU.Automated Reviews::63.001 Neural Engineering and Signal Processing::Graham et al., 2022
Generating cards for Graham 2022  (notes=1)
  → 9 cards
Created deck: CMU.Automated Reviews::63.001 Neural Engineering and Signal Processing::Dalrymple et al., 2021
Generating cards for Dalrymple 2021  (notes=1)
  → 4 cards
