---

# Main Script to Run

---

## Steps

1. Open a Paper on Zotero
2. Copy the paper into a text file named "paper.txt". Save this file into a folder named after the author. (ex. Doe et al., 2015)
3. Create annotations, extract them into a text file, and save them as "notes.txt"
4. Drag & drop the folders into the "_inbox" folder, in the same parent folder where all the paper folders are stored. The folders in the _inbox folder will be the ones that the script finds and acts on.
5. Run the Script 
6. A set of Anki flashcards will be generated

## complete example

In [1]:
import os
import tempfile
from dotenv import load_dotenv
from pyzotero import zotero
import requests
from PyPDF2 import PdfReader
import html2text

# Load env
load_dotenv()
ZOTERO_API_KEY             = os.getenv("ZOTERO_API_KEY")
LIB_ID              = os.getenv("ZOTERO_USER_ID")
LIB_TYPE            = os.getenv("ZOTERO_LIBRARY_TYPE", "user")
ANKICONNECT_URL     = "http://127.0.0.1:8765"
OPENAI_API_KEY          = os.getenv("OPENAI_API_KEY")

# Initialize Zotero client
zot = zotero.Zotero(LIB_ID, LIB_TYPE, ZOTERO_API_KEY)

def fetch_items(collection_id=None, tag=None, since=None):
    """Fetch Zotero items by collection, tag, or date."""
    params = {}
    if collection_id:
        params['collection'] = collection_id
    if tag:
        params['tag'] = tag
    if since:
        params['since'] = since
    return zot.items(**params)

def download_pdf(item):
    """Download the first PDF attachment for a Zotero item."""
    attachments = zot.children(item['key'], itemType='attachment')
    for att in attachments:
        if att['data']['contentType']=='application/pdf':
            pdf_bytes = zot.dump_attachment(att['key'])
            return pdf_bytes
    return None

def extract_text_from_pdf(pdf_bytes):
    reader = PdfReader(io.BytesIO(pdf_bytes))
    text = []
    for page in reader.pages:
        text.append(page.extract_text() or "")
    return "\n".join(text)

def extract_notes(item):
    """Concatenate all child “note” items into plain text."""
    notes = zot.children(item['key'], itemType='note')
    md = html2text.HTML2Text()
    md.ignore_links = True
    return "\n\n".join(md.handle(n['data']['note']) for n in notes)

def process_item(item):
    # Build a folder name like "Doe et al., 2023"
    auth = item['data']['creators'][0]['lastName']
    year = item['data'].get('date', '')[:4]
    folder = f"{auth} et al., {year}"
    os.makedirs(folder, exist_ok=True)

    # Paper.txt
    pdf = download_pdf(item)
    if pdf:
        paper_txt = extract_text_from_pdf(pdf)
        open(os.path.join(folder, "paper.txt"), "w", encoding="utf-8").write(paper_txt)

    # Notes.txt
    notes_txt = extract_notes(item)
    open(os.path.join(folder, "notes.txt"), "w", encoding="utf-8").write(notes_txt)

    return folder

_md = html2text.HTML2Text()
_md.ignore_links = True

def get_annotation_notes(item_key):
    """
    Fetch all child “note” items for this Zotero key,
    render HTML→plain text, and keep only those that start with “annotations”.
    """
    raw_notes = zot.children(item_key, itemType="note")
    plain_notes = [ _md.handle(n["data"]["note"]).strip()
                    for n in raw_notes ]
    return [n for n in plain_notes if n.lower().startswith("annotations")]

def main(collection_path_or_key):
    # 1) find the collection key (if they passed a path)
    try:
        coll_key = get_collection_id(zot, collection_path_or_key)
    except ValueError:
        coll_key = collection_path_or_key

    # 2) fetch every item in that collection
    items = zot.everything(zot.collection_items, coll_key)

    existing_decks = set(get_existing_decks())

    for item in items:
        # 3) pull only the “annotations…” notes
        annos = get_annotation_notes(item["key"])
        if not annos:
            continue

        # 4) join and send to OpenAI → parse Q&A
        notes_text = "\n\n".join(annos)
        raw_qa     = generate_notecards(notes_text)
        cards      = parse_q_and_a(raw_qa)

        # 5) build a deck name (e.g. by first author & year)
        creator = item["data"]["creators"][0]
        author  = creator.get("lastName", "Unknown")
        year    = item["data"].get("date","")[:4] or "n.d."
        deck    = f"{PARENT_DECK}::{author} et al., {year}"

        if deck in existing_decks:
            print(f"Deck '{deck}' exists; skipping.")
            continue

        # 6) push each card into Anki
        for q, a in cards:
            add_card_to_anki(deck, q, a)

        print(f"Added {len(cards)} cards to '{deck}'.")

# example usage:
if __name__ == "__main__":
    main("CMU::79 Research Articles::79.001 Electronics")


# ------------------------------------------------------------------

# code

def fetch_all_collections(zot, limit=200):
    """
    Returns a flat list of all collections by paging through Zotero’s API.
    limit: how many to request per call (max 100).
    """
    all_cols = []
    start    = 0

    while True:
        page = zot.collections(limit=limit, start=start)
        if not page:
            break
        all_cols.extend(page)
        start += len(page)
        if len(page) < limit:
            break

    return all_cols

def get_collection_id(zot, target_name):
    all_cols = fetch_all_collections(zot)
    for coll in all_cols:
        if coll['data']['name'] == target_name:
            return coll['data']['key']
    raise KeyError(f"Collection “{target_name}” not found")



def create_deck(deck_name):
    """
    Creates a deck in Anki using AnkiConnect.
    """
    payload = {
        "action": "createDeck",
        "version": 6,
        "params": {"deck": deck_name}
    }
    response = requests.post(ANKICONNECT_URL, json=payload)
    return response.json()


def add_card_to_anki(deck_name, front, back):
    """
    Ensures the deck exists, then adds a Basic card to it.
    """
    create_deck(deck_name)
    payload = {
        "action": "addNotes",
        "version": 6,
        "params": {
            "notes": [
                {
                    "deckName": deck_name,
                    "modelName": "Basic",
                    "fields": {"Front": front, "Back": back},
                    "tags": ["paper", "notecard"]
                }
            ]
        }
    }
    response = requests.post(ANKICONNECT_URL, json=payload)
    return response.json()


def generate_notecards(notes_text):
    """
    Sends the content of notes.txt to the OpenAI Chat Completions endpoint
    and returns the raw text containing Q&A pairs formatted as notecards.
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    prompt_messages = [
        {
            "role": "system",
            "content": (
                "You are given annotations from a research paper. "
                "Create concise question-answer pairs as notecards. "
                "Each notecard should start with 'Q:' for the question and 'A:' for the answer. "
                "Include the reference in the question (e.g., (Lastname et al., 2023)). "
                "Generate enough cards to cover the notes, but keep the total no greater than 10-15 cards."
            )
        },
        {"role": "user", "content": notes_text}
    ]
    payload = {
        "model": "gpt-3.5-turbo",
        "messages": prompt_messages,
        "temperature": 0.7
    }
    response = requests.post(OPENAI_URL, headers=headers, json=payload)
    data = response.json()
    return data["choices"][0]["message"]["content"]


def parse_q_and_a(generated_text):
    """
    Parses the generated text into a list of (question, answer) tuples.
    Expects the format:
      Q: Question text
      A: Answer text
    """
    lines = generated_text.splitlines()
    flashcards = []
    question = ""
    answer = ""
    for line in lines:
        line = line.strip()
        if line.startswith("Q:"):
            if question and answer:
                flashcards.append((question, answer))
                question, answer = "", ""
            question = line[2:].strip()
        elif line.startswith("A:"):
            answer = line[2:].strip()
    if question and answer:
        flashcards.append((question, answer))
    return flashcards


def get_existing_decks():
    """
    Retrieves the list of existing decks from Anki using AnkiConnect.
    """
    payload = {"action": "deckNames", "version": 6}
    response = requests.post(ANKICONNECT_URL, json=payload)
    return response.json().get("result", [])


def main():
    if not os.path.exists(INBOX_PATH):
        print(f"Inbox path does not exist: {INBOX_PATH}")
        return

    # Get the current list of decks from Anki
    existing_decks = set(get_existing_decks())

    # List all folders in the _inbox
    folder_names = [
        folder for folder in os.listdir(INBOX_PATH)
        if os.path.isdir(os.path.join(INBOX_PATH, folder))
    ]

    for folder in folder_names:
        # Create the full deck name using the parent deck and the folder name
        full_deck_name = f"{PARENT_DECK}::{folder}"
        if full_deck_name in existing_decks:
            print(f"Deck '{full_deck_name}' already exists. Skipping folder '{folder}'.")
            continue

        folder_path = os.path.join(INBOX_PATH, folder)
        notes_file = os.path.join(folder_path, "notes.txt")
        paper_file = os.path.join(folder_path, "paper.txt")

        if not (os.path.exists(notes_file) and os.path.exists(paper_file)):
            print(f"Missing notes.txt or paper.txt in folder: {folder}")
            continue

        with open(notes_file, "r", encoding="utf-8") as nf:
            notes_text = nf.read()

        # Generate notecards text via OpenAI and parse the output
        raw_notecards_text = generate_notecards(notes_text)
        notecards = parse_q_and_a(raw_notecards_text)
        
        for question, answer in notecards:
            result = add_card_to_anki(full_deck_name, question, answer)
            print(f"Added notecard to deck '{full_deck_name}': {result}")


if __name__ == "__main__":
    main()

NameError: name 'get_collection_id' is not defined

In [None]:


get_collection_id(zot,"63.001 Neural Engineering and Signal Processing")
