In [None]:
import os
import sys
import configparser
import datetime as dt
from collections import defaultdict
from datetime import datetime

from pyzotero import zotero

DATE_FMT = "%Y-%m-%dT%XZ"

# Settup flags
These variables should be updated before start

## Delete own attachments 
<a id='del-attach'></a>

Important when merging duplicate items. See explanations in 
[this cell](merge_duplicate_items.ipynb#del-attach).


## Delete Tags 

Zotero creates special tags to mark duplicates (`duplicate-citation-key`).
Set this to `True` to remove them in this [this cell](remove_duplicate_attachments.ipynb#tags).

In [None]:
DELETE_OWN_ATTACHMENTS = True

In [None]:
DELETE_TAGS = False  # @todo: check if this is necessary

# Load config

In [None]:
config_file = r"config.cfg"
if not os.path.exists(config_file):
    print(f"config file {config_file} does not exist!")
    sys.exit(0)
    
configFilePath = config_file 
configParser = configparser.RawConfigParser()
configParser.read(configFilePath)
library_id = int(configParser.get("zotero-config", "library_id"))
api_key = configParser.get("zotero-config", "api_key")
library_type = configParser.get("zotero-config", "library_type")

# Define functions

In [None]:
def date_added(item):
    return datetime.strptime(item["data"]["dateAdded"], DATE_FMT)

In [None]:
def attachment_is_pdf(_child):
    return (
        _child["data"]["itemType"] == "attachment"
        and _child["data"]["contentType"] == "application/pdf"
    )

In [None]:
def get_items_with_duplicate_pdf(_items):
    _items_duplicate_attach = []
    _pdf_attachments = defaultdict(list)
    for _item in _items:
        key = _item["key"]
        cs = zot.children(key)
        for c in cs:
            if attachment_is_pdf(c):
                _pdf_attachments[key].append(c["data"]["filename"])

        if len(_pdf_attachments[key]) > 1:
            _items_duplicate_attach.append(_item)

    return _items_duplicate_attach, _pdf_attachments

In [None]:
def retrieve_data():
    print("Retrieving Library...")
    zot = zotero.Zotero(library_id, library_type, api_key)
    lib_items = zot.everything(zot.top())
    T = dt.datetime.now()
    print(f"Done at {T.hour}:{T.minute}:{T.second}")
    return zot, lib_items

In [None]:
def get_items_by_doi_or_isbn(lib_items):
    items_by_doi_isbn = defaultdict(list)
    for item in lib_items:
        if "DOI" in item["data"]:
            items_by_doi_isbn[item["data"]["DOI"]].append(item)
        elif "ISBN" in item["data"]:
            items_by_doi_isbn[item["data"]["ISBN"]].append(item)
        
    return items_by_doi_isbn 

In [1]:
def log_delete(item):
    if "title" in item["data"].keys():
        ttt = f"{item['data']['title']}"
    else:
        ttt = ""

    print(
        f"""deleting ... 
    Key: {item['data']['key']}
    ItemType: {item['data']['itemType']}
    Title: {ttt}
    """
    )