Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit acfcaed
Showing
19 changed files
with
680 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
include papnt/config.ini | ||
include VERSION | ||
include REQUIREMENTS | ||
|
||
recursive-exclude * __pycache__ | ||
recursive-exclude * *.py[co] | ||
|
||
recursive-include docs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
VERSION := $(shell cat VERSION) | ||
|
||
distribute: | ||
conda run -n papnt python setup.py sdist | ||
shasum -a 256 dist/papnt-$(VERSION).tar.gz | cut -d ' ' -f 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
bibtexparser >= 1.4.0 | ||
click >= 8.0.4 | ||
crossrefapi >= 1.5.0 | ||
iso4 >= 0.0.2 | ||
nltk >= 3.6.7 | ||
notion-client >= 2.0.0 | ||
pdf2doi >= 1.5 | ||
unidecode >= 1.3.6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.0.1 |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Academic paper management with Notion |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import json | ||
|
||
from bibtexparser import loads | ||
from bibtexparser.bparser import BibTexParser | ||
from iso4 import abbreviate | ||
import nltk | ||
|
||
|
||
def _remove_duplicated_space(dict_: dict): | ||
return {k: v.replace(' ', ' ') for k, v in dict_.items()} | ||
|
||
|
||
class AbbrLister: | ||
def __init__(self, path_bib: str): | ||
with open(path_bib, 'r') as f: | ||
bibtext = f.read() | ||
parser = BibTexParser() | ||
bibdatabase = loads(bibtext, parser).entries_dict | ||
names_journal = [article.get('journal') | ||
for article in bibdatabase.values()] | ||
self.names_journal = sorted(list(set( | ||
[name for name in names_journal if name is not None]))) | ||
|
||
nltk.download('wordnet') | ||
|
||
def listup(self, spec: dict | None=None): | ||
""" | ||
sepc: dict | ||
Can specify abbreviation like... | ||
{'PLOS ONE': 'PLOS ONE'} | ||
Case insensitive. | ||
""" | ||
abbrs = {name: abbreviate(name) for name in self.names_journal} | ||
self.abbrs = _remove_duplicated_space(abbrs) | ||
if spec is None: | ||
return self | ||
specified_abbrs = {name: spec[name.lower()] | ||
for name in self.names_journal | ||
if spec.get(name.lower())} | ||
self.abbrs = self.abbrs | specified_abbrs | ||
return self | ||
|
||
def save(self, save_path: str): | ||
if not hasattr(self, 'abbrs'): | ||
raise ValueError('Use listup() first.') | ||
|
||
with open(save_path, 'w') as f: | ||
json.dump( | ||
{'default': {'container-title': self.abbrs}}, f, indent=2) | ||
|
||
|
||
if __name__ == '__main__': | ||
lister = AbbrLister('/Users/issakuss/Desktop/study14.bib') | ||
lister.listup().save('/Users/issakuss/Desktop/study14.json') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from pathlib import Path | ||
|
||
import click | ||
|
||
from .misc import load_config | ||
from .database import Database, DatabaseInfo | ||
from .mainfunc import ( | ||
add_records_from_local_pdfpath, | ||
update_unchecked_records_from_doi, | ||
update_unchecked_records_from_uploadedpdf, | ||
make_bibfile_from_records, make_abbrjson_from_bibpath) | ||
|
||
global config, database | ||
config = load_config(Path(__file__).parent / 'config.ini') | ||
database = Database(DatabaseInfo()) | ||
|
||
|
||
def _config_is_ok(): | ||
tokenkey_is_empty = len(config['database']['tokenkey']) == 0 | ||
database_id_is_empty = len(config['database']['database_id']) == 0 | ||
if tokenkey_is_empty or database_id_is_empty: | ||
click.echo('Open config.ini and edit database information: ' | ||
f'{Path(__file__).parent / "config.ini"}', err=True) | ||
return False | ||
else: | ||
return True | ||
|
||
|
||
# @click.group(context_settings=dict(help_option_names=['-h', '--help'])) | ||
@click.group(invoke_without_command=True) | ||
@click.pass_context | ||
def main(ctx): | ||
if ctx.invoked_subcommand is None: | ||
click.echo('try `papnt --help` for help') | ||
if _config_is_ok(): | ||
click.echo('Your config file is in: ' | ||
f'{Path(__file__).parent / "config.ini"}') | ||
|
||
|
||
@main.command() | ||
@click.argument('paths') | ||
def paths(paths: str): | ||
"""Add record(s) to database by local path to PDF file""" | ||
if not _config_is_ok(): | ||
return | ||
SEP = ',' | ||
paths = paths.split(SEP) if SEP in paths else [paths] | ||
for pdfpath in paths: | ||
add_records_from_local_pdfpath(database, config['propnames'], pdfpath) | ||
|
||
|
||
@main.command() | ||
def doi(): | ||
"""Fill information in record(s) by DOI""" | ||
if _config_is_ok(): | ||
update_unchecked_records_from_doi(database, config['propnames']) | ||
|
||
|
||
@main.command() | ||
def pdf(): | ||
"""Fill information in record(s) by uploaded PDF file""" | ||
if _config_is_ok(): | ||
update_unchecked_records_from_uploadedpdf( | ||
database, config['propnames']) | ||
|
||
|
||
@main.command() | ||
@click.argument('target') | ||
def makebib(target: str): | ||
"""Make BIB file including reference information from database""" | ||
if not _config_is_ok(): | ||
return | ||
make_bibfile_from_records( | ||
database, target, config['propnames'], | ||
config['misc']['dir_save_bib']) | ||
make_abbrjson_from_bibpath( | ||
f'{config["misc"]["dir_save_bib"]}/{target}.bib', | ||
config['abbr']) | ||
|
||
|
||
if __name__ == '__main__': | ||
_config_is_ok() | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[database] | ||
tokenkey = '' | ||
database_id = '' | ||
|
||
[propnames] ; Propety Names | ||
; bib name = property name | ||
; Check bib names: https://ja.wikipedia.org/wiki/BibTeX | ||
; Note that bib names will be used as lower case | ||
doi = DOI | ||
author = Authors | ||
title = Title | ||
edition = Edition | ||
year = Year | ||
journal = Journal | ||
volume = Volume | ||
pages = Pages | ||
publisher = Publisher | ||
ID = Citekey | ||
ENTRYTYPE = Type | ||
howpublished = HowPublished | ||
|
||
; Other property | ||
output_target = Cite in | ||
pdf = PDF | ||
|
||
[abbr] ; Specifiation of abbreviation | ||
Full Name = Abbreviated | ||
PLOS ONE = PLOS ONE | ||
|
||
[misc] | ||
; Directory to save bib files | ||
dir_save_bib = '' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
SKIPWORDS = ( | ||
'a', 'ab', 'aboard', 'about', 'above', 'across', 'after', 'against', 'al', 'along', 'amid', 'among', 'an', 'and', 'anti', 'around', 'as', 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'besides', | ||
'between', 'beyond', 'but', 'by', | ||
'd', 'da', 'das', 'de', 'del', 'dell', 'dello', 'dei', 'degli', 'della', 'dell', 'delle', 'dem', 'den', 'der', 'des', 'despite', 'die', 'do', 'down', 'du', 'during', | ||
'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'el', 'en', 'et', 'except', | ||
'for', 'from', | ||
'gli', | ||
'i', 'il', 'in', 'inside', 'into', 'is', | ||
'l', 'la', 'las', 'le', 'les', 'like', 'lo', 'los', | ||
'near', 'nor', | ||
'of', 'off', 'on', 'onto', 'or', 'over', | ||
'past', 'per', 'plus', | ||
'round', | ||
'save', 'since', 'so', 'some', 'sur', | ||
'than', 'the', 'through', 'to', 'toward', 'towards', | ||
'un', 'una', 'unas', 'under', 'underneath', 'une', 'unlike', 'uno', 'unos', 'until', 'up', 'upon', | ||
'versus', 'via', 'von', | ||
'while', 'with', 'within', 'without', | ||
'yet', | ||
'zu', 'zum') | ||
|
||
CROSSREF_TO_BIB = { | ||
# https://ja.wikipedia.org/wiki/BibTeX | ||
# https://api.crossref.org/v1/types | ||
'journal-article': 'article', | ||
'monograph': 'book', | ||
'book': 'book', | ||
'book-section': 'inbook', | ||
'book-track': 'inbook', | ||
'book-part': 'inbook', | ||
'book-chapter': 'inbook', | ||
'proceedings-article': 'inproceedings', | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from typing import Optional, Dict, List | ||
from pathlib import Path | ||
|
||
from notion_client import Client | ||
|
||
from .misc import load_config | ||
|
||
|
||
class DatabaseInfo: | ||
def __init__(self, path_config: Optional[str | Path]=None): | ||
path_config = path_config or (Path(__file__).parent / 'config.ini') | ||
config = load_config(path_config) | ||
self.tokenkey = config['database']['tokenkey'] | ||
self.database_id = config['database']['database_id'] | ||
|
||
|
||
class Database: | ||
def __init__(self, dbinfo: DatabaseInfo): | ||
self.notion = Client(auth=dbinfo.tokenkey) | ||
self.database_id = dbinfo.database_id | ||
|
||
def fetch_records(self, filter: Optional[dict]=None, debugmode: bool=False | ||
) -> List: | ||
records = [] | ||
start_cursor = None | ||
while True: | ||
database = self.notion.databases.query( | ||
database_id=self.database_id, filter=filter, | ||
start_cursor=start_cursor) | ||
records += database['results'] | ||
if not database['has_more']: | ||
self.db_results = records | ||
return self | ||
start_cursor = database['next_cursor'] | ||
if debugmode: | ||
print('It is debugmode, records were fetched partly.') | ||
self.db_results = records | ||
return self | ||
|
||
def update(self, page_id: str, prop: Dict): | ||
self.notion.pages.update(page_id=page_id, properties=prop) | ||
|
||
def create(self, prop: Dict): | ||
self.notion.pages.create( | ||
parent={'database_id': self.database_id}, properties=prop) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import requests | ||
from pathlib import Path | ||
|
||
from bibtexparser.bwriter import BibTexWriter | ||
from bibtexparser.bibdatabase import BibDatabase | ||
|
||
from .database import Database | ||
from .abbrlister import AbbrLister | ||
from .pdf2doi import pdf_to_doi | ||
from .notionprop import NotionPropMaker | ||
from .prop2entry import notionprop_to_entry | ||
|
||
|
||
DEBUGMODE = False | ||
|
||
|
||
def add_records_from_local_pdfpath( | ||
database: Database, propnames: dict, input_pdfpath: str): | ||
|
||
doi = pdf_to_doi(input_pdfpath) | ||
if doi is None: | ||
raise Exception('DOI was not extracted from PDF.') | ||
prop = NotionPropMaker().from_doi(doi, propnames) | ||
prop |= {'info': {'checkbox': True}} | ||
database.create(prop) | ||
|
||
|
||
def _update_record_from_doi( | ||
database: Database, doi: str, id_record: str, propnames: dict): | ||
|
||
prop_maker = NotionPropMaker() | ||
prop = prop_maker.from_doi(doi, propnames) | ||
prop |= {'info': {'checkbox': True}} | ||
try: | ||
database.update(id_record, prop) | ||
except Exception as e: | ||
print(str(e)) | ||
name = prop['Name']['title'][0]['text']['content'] | ||
raise ValueError(f'Error while updating record: {name}') | ||
|
||
|
||
def update_unchecked_records_from_doi(database: Database, propnames: dict): | ||
filter = { | ||
'and': [{'property': 'info', 'checkbox': {'equals': False}}, | ||
{'property': 'DOI', 'rich_text': {'is_not_empty': True}}]} | ||
for record in database.fetch_records(filter).db_results: | ||
doi = record['properties']['DOI']['rich_text'][0]['plain_text'] | ||
_update_record_from_doi(database, doi, record['id'], propnames) | ||
|
||
|
||
def update_unchecked_records_from_uploadedpdf( | ||
database: Database, propnames: dict): | ||
PATH_TEMP_PDF = Path('you-can-delete-this-file.pdf') | ||
filter = { | ||
'and': [{'property': 'info', 'checkbox': {'equals': False}}, | ||
{'property': propnames['pdf'], | ||
'files': {'is_not_empty': True}}]} | ||
for record in database.fetch_records(filter).db_results: | ||
fileurl = record['properties'][propnames['pdf']] | ||
fileurl = fileurl['files'][0]['file']['url'] | ||
pdffile = requests.get(fileurl).content | ||
with PATH_TEMP_PDF.open(mode='wb') as f: | ||
f.write(pdffile) | ||
doi = pdf_to_doi(PATH_TEMP_PDF) | ||
PATH_TEMP_PDF.unlink() | ||
if doi is None: | ||
continue | ||
_update_record_from_doi(database, doi, record['id'], propnames) | ||
|
||
|
||
def make_bibfile_from_records(database: Database, target: str, | ||
propnames: dict, dir_save_bib: str): | ||
propname_to_bibname = {val: key for key, val in propnames.items()} | ||
filter = {'property': propnames['output_target'], | ||
'multi_select': {'contains': target}} | ||
entries = [notionprop_to_entry(record['properties'], propname_to_bibname) | ||
for record in database.fetch_records(filter).db_results] | ||
|
||
bib_db = BibDatabase() | ||
bib_db.entries = entries | ||
writer = BibTexWriter() | ||
with open(f'{dir_save_bib}/{target}.bib', 'w') as bibfile: | ||
bibfile.write(writer.write(bib_db)) | ||
|
||
|
||
def make_abbrjson_from_bibpath(input_bibpath: str, special_abbr: dict): | ||
lister = AbbrLister(input_bibpath) | ||
lister.listup(special_abbr).save(input_bibpath.replace('.bib', '.json')) | ||
|
||
|
||
if __name__ == '__main__': | ||
from .misc import load_config | ||
from .database import DatabaseInfo | ||
|
||
config = load_config(Path(__file__).parent / 'config.ini') | ||
database = Database(DatabaseInfo()) | ||
|
||
add_records_from_local_pdfpath( | ||
database, config['propnames'], 'test/samplepdfs/sample1.pdf') | ||
update_unchecked_records_from_doi(database, config['propnames']) | ||
update_unchecked_records_from_uploadedpdf( | ||
database, config['propnames']) | ||
make_bibfile_from_records( | ||
database, 'test', config['propnames'], config['misc']['dir_save_bib']) | ||
make_abbrjson_from_bibpath( | ||
config['misc']['dir_save_bib'] + 'test.bib', config['abbr']) |
Oops, something went wrong.