# Get config parameters

## Data needed
We need three informations:
- the **ID of the group** library.  
  Can be found by opening the group’s page: https://www.zotero.org/groups/groupname,   
  and hovering over the group settings link.
- the **API key** from the Zotero [site](https://www.zotero.org/settings/keys/new)
- **library_type** 
  - own Zotero library --> user
  - shared library --> group
  
## Config file

Rename `config_template.cfg` to `config.cfg` and populate it with the three information as explained above.

In [None]:
import configparser

configFilePath = r'config.cfg'
configParser = configparser.RawConfigParser()   
configParser.read(configFilePath)
library_id = int(configParser.get('zotero-config', 'library_id'))
api_key = configParser.get('zotero-config', 'api_key')
library_type = configParser.get('zotero-config', 'library_type')

# Find items with duplicate attachments

In [None]:
from pyzotero import zotero
from datetime import datetime
from collections import defaultdict


DATE_FMT = "%Y-%m-%dT%XZ"

def date_added(item):
    return datetime.strptime(item['data']['dateAdded'], DATE_FMT)


print("Retrieving Library...")
zot = zotero.Zotero(library_id, library_type, api_key)
lib_items = zot.everything(zot.top())

print("Resolving duplicates...")
items_duplicate_attach = []
duplicate_items = {}
for item in lib_items:
    key = item['data']['key']
    if item['meta']['numChildren'] > 1:
        items_duplicate_attach.append(item)

    if 'attachment' in item['links'].keys():
        attach = item['links']['attachment']['href'].split("/")[-1]
        type_attach = item['links']['attachment']['attachmentType']
        
    else:
        attach = "NO_ATTACHMENT"
        type_attach = "NO_TYPE"

    iType = item['data']['itemType']
    Title = item['data']['title']
    if not iType in duplicate_items.keys():
        duplicate_items[iType] = [Title.capitalize()] 
    else: 
        duplicate_items[iType].append(Title.capitalize())
            
    creators = lib_items[0]['data']['creators'] # could be author or editor
    firstname = "UNKNOWN"
    lastname = "UNKNOWN"
    for creator in creators: 
        if creator['creatorType'] == "author":
            firstname = creator['firstName']
            lastname = creator['lastName']
            break
            
        
    print(f"""
    Key: {key}
    Title: {item['data']['title']}
    Author: {firstname}, {lastname}
    File: {attach} | Type: {type_attach}
    Num Attach: {item['meta']['numChildren']}
    ----""")

for Type in duplicate_items.keys():    
    num_duplicates_items = len(duplicate_items[Type]) - len(set(duplicate_items[Type]))
    if num_duplicates_items:
        print(f">> Found {num_duplicates_items} duplicate items of type {Type}")
    
num_duplicate_attachments = len(items_duplicate_attach)    
# todo: these attachements can be pdf, notes, zip, etc.
# but we are interested in pdf files only!!
print(f">> Found {num_duplicate_attachments} items with multiple attachements: ")

#found_duplicate_attachements = True if num_duplicates else False

# Check if Trash is empty

In [None]:
if len(zot.trash()) > 0:
    print("Trash is not empty. Consider emptying it!")

# Merge duplicates 

This part needs some testing. 

In [None]:
print("Resolving duplicates...")
# sort items by DOI
by_doi = defaultdict(list)
for item in lib_items:
    if 'DOI' in item['data']:
        by_doi[item['data']['DOI']].append(item)
    elif 'ISBN' in item['data']:
        by_doi[item['data']['ISBN']].append(item)
        
delete_items = []
update_items = []
for doi, items in by_doi.items():
    print(f"doi/isbn: {doi}, n: {len(items)}")
    if len(items) == 1:
        continue

    # sort by age
    items.sort(key=date_added)
    # keep oldest item
    keep = items[0]
    print(keep['data'])
    # keep latest attachments
    keep_cs = zot.children(keep['key'])
    for item in items[-1:0:-1]:
        cs = zot.children(item['key'])
        if cs:
            for c in cs:
                c['data']['parentItem'] = keep['key']
                
            update_items.extend(cs)
            delete_items.extend(keep_cs)
            break

    delete_items.extend(items[1:])
        
print(f"Create list of items to update: {len(update_items)} items found.")
print(f"Create list of items to delete: {len(delete_items)} items found.")

# Update and delete duplicate items

**WARNING**: This cell is dangerous!


In [None]:
print("Updating library...")
# update first, so we don't delete parents of items we want to keep
for update_item in update_items:
    zot.update_item(update_item)

# now delete: DANGER AREA! 
for delete_item in delete_items:
    if 'title' in delete_item['data'].keys():
        ttt = f"{delete_item['data']['title']}"
    else:
        ttt = ""
    
    print(f"""deleting ... 
    Key: {delete_item['data']['key']}
    ItemType: {delete_item['data']['itemType']}
    Title: {ttt}
    """)
    
    if 'attachment' in delete_item['links'].keys():
        answer = input("item has attachement y[N]?")
        if answer == "y":
            zot.delete_item(delete_item)
            
    else:
        zot.delete_item(delete_item)
        
print("done!")        
        