In [1]:
import re

import settings
from pyzotero import zotero

zot = zotero.Zotero(settings.USER_ID, 'user', settings.API_KEY)

In [None]:
from zotscripts import *

## Titles

### Try different ways of checking for words that should retain their capitalization

In [2]:
cc_rx = [
#     r'([A-Z][a-z0-9]+){2,}',
#     r'(\w+[A-Z][a-z0-9]?)+|([A-Z][a-z0-9]+){2,}',
    r'(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,})',
    r'(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,})',
    ]

test_words = [
    'ChemSpider',
    'PubChemRDF',
    'eNanoMapper',
    'iPod',
    'IPython',
    # Acronyms
    'NGOs',
    'CA',
    'IC2',
    # Chemical terms
    'InChI',
    'ZnO',
    'SO3',
    'Br2',
    'H2',
    'Fe2O3',
    'NOx',
    # These should not match
    'Monocapitalized',
    'Mono-Capitalized'
]

for word in test_words:
    results = [re.search(p, word) for p in cc_rx]
    if any(results) and not all(results):
        print('{:<40}'.format('DIFF'), word)
        for (p, r) in zip(cc_rx, map(bool, results)):
            print('{0} {1}'.format(p, r))
    elif all(results):
        print('{:<40}'.format('ALL'), word)
    else:
        print('{:<40}'.format('NONE'), word)

ALL                                      ChemSpider
ALL                                      PubChemRDF
ALL                                      eNanoMapper
ALL                                      iPod
ALL                                      IPython
ALL                                      NGOs
DIFF                                     CA
(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,}) False
(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,}) True
ALL                                      IC2
ALL                                      InChI
DIFF                                     ZnO
(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,}) False
(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,}) True
ALL                                      SO3
DIFF                                     Br2
(?P<head>\w+)(?(head)(?P<hump>[A-Z][a-z0-9]+)|(?P=hump){2,}) False
(?P<head>\w+)(?(head)(?P<hump>[A-Z0-9][a-z]*)|(?P=hump){2,}) True
DIFF                            

In [None]:
# with open('all_titles.txt', 'r') as fp:
#     for line in fp:
#         for word in line.split():
#             results = [re.search(p, word) for p in cc_rx]
# ...

### Try out sentence-casing titles

In [None]:
with open('all_titles.txt', 'r') as fp:
    for line in fp:
        print(to_sentence_case(line))

### Try out title separators

In [None]:
with open('all_titles.txt', 'r') as fp:
    for line in fp:
        parts = SEPARATORS.split(line)
        print(parts)

### Dry run of sentence-casing titles for all items in a collection

In [None]:
coll_id = settings.TEST_COLL_ID

print(zot.num_collectionitems(coll_id), 'items in collection.')
items = all_coll_items(zot, coll_id)

for item in items:
    new_item = item_titles_to_sentence(item)
    # No need to print anything extra
    # No updating

### One-off script to update ONLY `shortTitle` and `bookTitle`

In [None]:
# from pyzotero.zotero_errors import PreConditionFailed

coll_id = settings.TEST_COLL_ID

print(zot.num_collectionitems(coll_id), 'items in collection.')
items = all_coll_items(zot, coll_id)

for item in items:
    new_item = item_titles_to_sentence(item, keys=['shortTitle', 'bookTitle'])
#     if new_item:
#         try:
#             zot.update_item(new_item)
#         except PreConditionFailed:
#             print('{:<10}'.format('FAILED'), item['data']['title'])

## Journal article metadata

### One-off script to clear  `accessDate` from all journal articles in a collection

In [None]:
coll_id = settings.TEST_COLL_ID

print(zot.num_collectionitems(coll_id), 'items in collection.')
items = all_coll_items(zot, coll_id)

for item in items:
    if item['data']['itemType'] != 'journalArticle':
        continue
    
    if item['data']['accessDate']:
        item['data']['accessDate'] = ''
        print('Cleared accessDate:', item['data']['key'])
        zot.update_item(item)