In [2]:
##import sys
##!{sys.executable} -m pip install nameparser

# Libraries
import bibtexparser
import certifi
import datetime
import json
import numpy as np
import os
import pandas as pd
import pycurl
import re
import requests

from bibtexparser.bparser import BibTexParser
from datetime import date
from habanero import Crossref
from io import BytesIO
from pyzotero import zotero
from serpapi import GoogleSearch
from urllib.parse import urlencode

In [3]:
# Set your API_KEY 
API_KEY = ""
MIN_YEAR = 2020
TERM = "brain oxygen memory"

# Search Parameters 
params = {
  "api_key": API_KEY,
  "device": "desktop",
  "engine": "google_scholar",
  "q": TERM,
  "hl": "en",
  "num": "20",
  "as_ylo": MIN_YEAR
}

# Search
search = GoogleSearch(params)

In [4]:
# Scrape Results, Extract Result Id's
json_data = search.get_raw_json()
data = json.loads(json_data)
df = pd.json_normalize(data['organic_results'])
ris = df['result_id']

https://serpapi.com/search


In [7]:
# TEMP
#print(ris)

for c in ris:
    print(c)

h8qLjPx3QjIJ
IlF9j4SFkqgJ
XzVO87V5VEIJ
LSEqQzeM9N4J
k1j0uahSYIcJ
rz5KOI2tsjkJ
IWx38RIDXnkJ
PR9Z8cF-NQYJ
7i9dNoqxeBcJ
ugfU-qa71lMJ
0BDqIHZdnTQJ
p-SR6kvjJxwJ
gKSLgZo6NB4J
6nbEwVplPlYJ
Ow3Twp63MpkJ
O52qi6OPW0oJ
v3e3QcQzlpgJ
pLybULKFInoJ
aOCCTFh6gTUJ
Xp_89DjMc5kJ


In [456]:
# Get the Citation!
params = {
  "api_key": API_KEY,
  "device": "desktop",
  "engine": "google_scholar_cite",
  "q": ris[0]
}

search = GoogleSearch(params)
citation = search.get_dict()

https://serpapi.com/search


In [457]:
# Get APA Format Citation and Parse
citation['citations'][1]['snippet']

'Lucas, Y., Portier, P. E., Laporte, L., He-Guelton, L., Caelen, O., Granitzer, M., & Calabretto, S. (2020). Towards automated feature engineering for credit card fraud detection using multi-perspective HMMs. Future Generation Computer Systems, 102, 393-402.'

In [458]:
# Cross-reference the Citation with Crossref to Get Bibtext
base = 'https://api.crossref.org/works?query.'
api_url = {'bibliographic':citation['citations'][1]['snippet']}
url = urlencode(api_url)
url = base+url
response = requests.get(url)

In [459]:
# Parse Bibtext from Crossref
jsonResponse = response.json()
jsonResponse = jsonResponse['message']
jsonResponse = jsonResponse['items']
jsonResponse = jsonResponse[0]
jsonResponse['DOI']
curl_str = 'curl -LH "Accept: application/x-bibtex" http://dx.doi.org/' + jsonResponse['DOI']

In [460]:
result = os.popen(curl_str).read()

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   233  100   233    0     0   1122      0 --:--:-- --:--:-- --:--:--  1122
100   534    0   534    0     0   1384      0 --:--:-- --:--:-- --:--:--  1384


In [461]:
# Write bibtext file
text_file = open("auto_cite.bib", "w")
n = text_file.write(result)
text_file.close()

In [462]:
# Parse bibtext
with open('auto_cite.bib') as bibtex_file:
    parser = BibTexParser()
    parser.customization = bibtexparser.customization.author    
    bib_database = bibtexparser.load(bibtex_file, parser=parser)
bib_dict = bib_database.entries[0]
print(bib_dict)

{'journal': 'Future Generation Computer Systems', 'title': 'Towards automated feature engineering for credit card fraud detection using multi-perspective {HMMs}', 'author': ['Lucas, Yvan', 'Portier, Pierre-Edouard', "Laporte, L{\\'{e}}a", 'He-Guelton, Liyun', 'Caelen, Olivier', 'Granitzer, Michael', 'Calabretto, Sylvie'], 'pages': '393--402', 'volume': '102', 'publisher': 'Elsevier {BV}', 'month': 'jan', 'year': '2020', 'url': 'https://doi.org/10.1016%2Fj.future.2019.08.029', 'doi': '10.1016/j.future.2019.08.029', 'ENTRYTYPE': 'article', 'ID': 'Lucas_2020'}


In [463]:
# Connect to Zotero
zot = zotero.Zotero('7032524', 'user', 'HAk2Mg9hOkkUizABjJEBFWRz')
template = zot.item_template('journalArticle') # Set Template

In [475]:
# Populate Zotero Template with Data
try:
    template['publicationTitle'] = bib_dict['journal']
except:
    pass
template['title'] = bib_dict['title']
template['DOI'] = str(jsonResponse['DOI'])
try:
    template['accessDate'] = str(date.today())
except:
    pass
try:
    template['extra'] = str(bib_database.comments)
except:
    pass
try:
    template['url'] = bib_dict['url']
except:
    pass
try:
    template['volume'] = bib_dict['volume']
except:
    pass
try:
    template['issue'] = bib_dict['number']
except:
    pass
try:
    template['abstractNote'] = df['snippet'][0]
except:
    pass

# Fix Date
mydate = bib_dict['month']+' '+bib_dict['year']
template['date'] = str(datetime.datetime.strptime(mydate, '%b %Y').date())

In [476]:
# Parse Names into Template/Data
num_authors = len(bib_dict['author'])
template['creators'] = []

for a in bib_dict['author']:
    split = bibtexparser.customization.splitname(a, strict_mode=False)
    template['creators'].append({'creatorType': 'author', 'firstName': split['first'][0], 'lastName': split['last'][0]})

print(template)

{'itemType': 'journalArticle', 'title': 'Towards automated feature engineering for credit card fraud detection using multi-perspective {HMMs}', 'creators': [{'creatorType': 'author', 'firstName': 'Yvan', 'lastName': 'Lucas'}, {'creatorType': 'author', 'firstName': 'Pierre-Edouard', 'lastName': 'Portier'}, {'creatorType': 'author', 'firstName': "L{\\'{e}}a", 'lastName': 'Laporte'}, {'creatorType': 'author', 'firstName': 'Liyun', 'lastName': 'He-Guelton'}, {'creatorType': 'author', 'firstName': 'Olivier', 'lastName': 'Caelen'}, {'creatorType': 'author', 'firstName': 'Michael', 'lastName': 'Granitzer'}, {'creatorType': 'author', 'firstName': 'Sylvie', 'lastName': 'Calabretto'}], 'abstractNote': '… In order not to depend on expert knowledge, we favour automated feature engineering in a … The multiple perspective property of our HMM-based automated feature engineering strategy …', 'publicationTitle': 'Future Generation Computer Systems', 'volume': '102', 'issue': '', 'pages': '', 'date': '2

In [477]:
zot.create_items([template])

{'successful': {'0': {'key': 'JFQKKDQ8',
   'version': 311,
   'library': {'type': 'user',
    'id': 7032524,
    'name': 'hackr',
    'links': {'alternate': {'href': 'https://www.zotero.org/hackr',
      'type': 'text/html'}}},
   'links': {'self': {'href': 'https://api.zotero.org/users/7032524/items/JFQKKDQ8',
     'type': 'application/json'},
    'alternate': {'href': 'https://www.zotero.org/hackr/items/JFQKKDQ8',
     'type': 'text/html'}},
   'meta': {'creatorSummary': 'Lucas et al.',
    'parsedDate': '2020-01-01',
    'numChildren': 0},
   'data': {'key': 'JFQKKDQ8',
    'version': 311,
    'itemType': 'journalArticle',
    'title': 'Towards automated feature engineering for credit card fraud detection using multi-perspective {HMMs}',
    'creators': [{'creatorType': 'author',
      'firstName': 'Yvan',
      'lastName': 'Lucas'},
     {'creatorType': 'author',
      'firstName': 'Pierre-Edouard',
      'lastName': 'Portier'},
     {'creatorType': 'author',
      'firstName': "L