In [1]:
import re

import settings
from pyzotero import zotero

zot = zotero.Zotero(settings.USER_ID, 'user', settings.API_KEY)

In [2]:
import zotscripts as zs

## Collections

In [3]:
# This defines a dict of tags to be added to each specific collection,
# by collection name.
from tags import coll_tags

In [7]:
# Get a dict of name: key for all collections.
colls = zot.collections()
names_keys = zs.coll_names_keys(colls)

In [10]:
keys_tags = {names_keys[name]: coll_tags[name] for name in coll_tags}
keys_tags

{'4D4HMQMC': ['Biomonitoring'],
 '8WGEBIWX': ['Cheminformatics'],
 'AVJBFRKW': ['Computational Toxicology'],
 'B9X2QJE7': ['Body Burden'],
 'DMZ258CQ': ['Nanomaterials', 'Nanotoxicology'],
 'DNEDW2RZ': ['Nanomaterials'],
 'GKXGPZVD': ['Bioinformatics'],
 'KVDRDNFU': ['Exposure']}

In [None]:
for key, tags in keys_tags.items():
    for item in zot.collection_items(key):
        if item['data']['itemType'] not in ['note', 'attachment']:
            print('Add tags', tags, 'to', item['data']['title'])
            # Do nothing

### Automatically add items from one collection (including subcollection items?) to another collection

... doesn't work... `Zotero.collection_items` doesn't retrieve subcollection items.

In [None]:
zotscripts_tag = zs.coll_key_from_name('zotscripts_tag', all_colls)
print(zotscripts_tag)

chdc_share = zs.coll_key_from_name('Zotero-CHDC export', all_colls)
print(chdc_share)

In [None]:
# No wonder it doesn't work...
items = zot.collection_items(zotscripts_tag)
print(len(items))
for item in items:
    try:
        print('Adding item', item['data']['title'])
    except KeyError:
        pass
#     zot.addto_collection(item, chdc_share)

## Titles

### Try different ways of checking for words that should retain their capitalization

In [2]:
cc_rx = [
#     r'([A-Z][a-z0-9]+){2,}',
#     r'(\w+[A-Z][a-z0-9]?)+|([A-Z][a-z0-9]+){2,}',
    r'(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,})',
    r'(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,})',
    ]

test_words = [
    'ChemSpider',
    'PubChemRDF',
    'eNanoMapper',
    'iPod',
    'IPython',
    # Acronyms
    'NGOs',
    'CA',
    'IC2',
    # Chemical terms
    'InChI',
    'ZnO',
    'SO3',
    'Br2',
    'H2',
    'Fe2O3',
    'NOx',
    # These should not match
    'Monocapitalized',
    'Mono-Capitalized'
]

for word in test_words:
    results = [re.search(p, word) for p in cc_rx]
    if any(results) and not all(results):
        print('{:<40}'.format('DIFF'), word)
        for (p, r) in zip(cc_rx, map(bool, results)):
            print('{0} {1}'.format(p, r))
    elif all(results):
        print('{:<40}'.format('ALL'), word)
    else:
        print('{:<40}'.format('NONE'), word)

ALL                                      ChemSpider
ALL                                      PubChemRDF
ALL                                      eNanoMapper
ALL                                      iPod
ALL                                      IPython
ALL                                      NGOs
DIFF                                     CA
(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,}) False
(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,}) True
ALL                                      IC2
ALL                                      InChI
DIFF                                     ZnO
(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,}) False
(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,}) True
ALL                                      SO3
DIFF                                     Br2
(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,}) False
(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,}) True
DIFF                            

### Try out sentence-casing titles

In [None]:
with open('all_titles.txt', 'r') as fp:
    for line in fp:
        print(zs.to_sentence_case(line))

### Try out title separators

In [None]:
with open('all_titles.txt', 'r') as fp:
    for line in fp:
        parts = zs.SEPARATORS.split(line)
        print(parts)