# Get config parameters

## Data needed
We need three informations:
- the **ID of the group** library.  
  Can be found by opening the group’s page: https://www.zotero.org/groups/groupname,   
  and hovering over the group settings link.
- the **API key** from the Zotero [site](https://www.zotero.org/settings/keys/new)
- **library_type** 
  - own Zotero library --> user
  - shared library --> group
  
## Config file

Rename `config_template.cfg` to `config.cfg` and populate it with the three information as explained above.

In [None]:
import configparser

configFilePath = r'config.cfg'
configParser = configparser.RawConfigParser()   
configParser.read(configFilePath)
library_id = int(configParser.get('zotero-config', 'library_id'))
api_key = configParser.get('zotero-config', 'api_key')
library_type = configParser.get('zotero-config', 'library_type')

# Report items with duplicate attachments

In [None]:
from pyzotero import zotero
from datetime import datetime
from collections import defaultdict
import datetime as dt

T = dt.datetime.now()
DATE_FMT = "%Y-%m-%dT%XZ"
def date_added(item):
    return datetime.strptime(item['data']['dateAdded'], DATE_FMT)

print("Retrieving Library...")
zot = zotero.Zotero(library_id, library_type, api_key)
lib_items = zot.everything(zot.top())
print(f"Done at {T.hour}:{T.minute}:{T.second}")

In [None]:
print("Resolving duplicates...")
items_duplicate_attach = []
for item in lib_items:
    if item['meta']['numChildren'] > 1:
        items_duplicate_attach.append(item)

    if 'attachment' in item['links'].keys():
        attach = item['links']['attachment']['href'].split("/")[-1]
        type_attach = item['links']['attachment']['attachmentType']
        
    else:
        attach = "NO_ATTACHMENT"
        type_attach = "NO_TYPE"

    print(f"""
    Key: {item['data']['key']}\n
    Title: {item['data']['title']}\n
    Author: {item['data']['creators'][0]['firstName']}, {item['data']['creators'][0]['lastName']}\n
    File: {attach} | Type: {type_attach}\n
    Num Attach: {item['meta']['numChildren']}\n
    ----""")

num_duplicates = len(items_duplicate_attach)    
# todo: these attachements can be pdf, notes, zip, etc.
# but we are interested in pdf files only!!
print(f">> Found {num_duplicates} items with multiple attachements: ")

#found_duplicate_attachements = True if num_duplicates else False

## Check if Trash is empty

In [None]:
if len(zot.trash()) > 0:
    print("Trash is not empty. Consider emptying it!")

## Report items with multiple attachments

Multiple attachments are ok.  
We are looking for duplicate pdf files.   
This cell is just for reporting.

In [None]:
#lib_items.sort(key=date_added)
for item in lib_items:
    print(f"key  : {item['key']}")
    cs = zot.children(item['key'])
    for c in cs:
        print(f"child: {c['key']}")
        
    print("---")    

## Remove items with duplicate attachments

**WARNING**: This cell is dangerous!

Here, duplicate attachments are getting removed.


In [None]:
print("Updating library...")
print("===========")
deleted_attachement = False
def attachment_is_pdf(c):
    return child['data']['itemType'] == "attachment" and child['data']['contentType'] == 'application/pdf'

for item in items_duplicate_attach:
    cs = zot.children(item['key'])
    files = []
    for child in cs:
        if 'filename' in child['data'].keys(): # notes have no filename!
            print(f"[{child['data']['filename']}]: {item['data']['title']}")
            files.append(child['data']['filename'])
    
    print("-----")
    
    # DANGER AREA!!    
    if len(set(files)) == 1 and len(files) > 1: # some items have different pdf files, like suppl materials. Should not delete
        # here attachments are all named the same -->  a sign of duplicates
        print("all files are the same. Proceed deleting ..")
        print(files)
        for child in cs[1:]:
            if attachment_is_pdf(child): 
                print(f"delete {child['data']['filename']}")
                zot.delete_item(child)
                deleted_attachement = True
                
    else: # manual mode!
        for child in cs[1:]:             
            if attachment_is_pdf(child):
                # ask only for pdf files. Other files, like notes, zip, etc, should not be deleted, anyway.
                answer = input(f"delete {child['data']['filename']}. y[N]?")
                if answer == "y":
                    print(f"deleting {child['data']['filename']}")
                    zot.delete_item(child)
                    deleted_attachement = True
                    
print("===========")
T = dt.datetime.now()
if deleted_attachement:
    print("Attachments deleted!")
else: 
    print("No attachments deleted!")

print(f"Done at {T.hour}:{T.minute}:{T.second}")                     

## Delete duplicate tag

In [None]:
DELETE_TAGS = False # @todo: check if this is necessary
if DELETE_TAGS and deleted_attachement:            
    zot.delete_tags('#duplicate-citation-key')            