In [14]:
from sci_clone import config
import typer 
import pyfiglet
import random
from typing import List, Tuple, Optional
from pathlib import Path
from os import path, getcwd, mkdir
from datetime import datetime
import time
import re
import json
from urllib import request, parse
import configparser

In [61]:
class Requester:
    def __init__(self, config, timeout):
        etiquette = f"{config.__name__ }/{config.__version__} ({config.__url__}; " + \
                f"mailto:{config.__author_email__}) " + \
                f"BasedOn:{config.__name__}/{config.__version__}"
        self.header = {"user-agent": etiquette}
        self.timeout = timeout
    
    # retry decorator
    # Example:
    # @retry(3, 2) or @retry()
    # def test():
    #     pass
    def retry(retry_count, retry_interval):
        """
        retry decorator
        """
        def real_decorator(decor_method):
            def wrapper(*args, **kwargs):
                for count in range(retry_count):
                    try:
                        return_values = decor_method(*args, **kwargs)
                        return return_values
                    except Exception as error:
                        print(f"FATAL: retry: {count + 1} . Function execution failed for {decor_method.__name__}")
                        time.sleep(retry_interval)
                        if count == retry_count-1:
                            raise error
            return wrapper
        return real_decorator
    
    @retry(retry_count=3, retry_interval=2)
    def request(self, url, params="", method="GET"):
        query_string = parse.urlencode(params)
        if method == "GET":
            if params:
                req = request.Request(f"{url}?{query_string}")
            else:
                req = request.Request(url)
        elif method == "POST":
            req = request.Request(url, query_string.encode("UTF-8"))
        for key in self.header.keys():
            req.add_header(key, self.header[key])
        response = request.urlopen(req, timeout=self.timeout)
        return response

In [33]:
class GenList:
    def __init__(self, query, requester):
        self.query = query
        self.requester = requester
        
    def get_query_list(self):
        if re.match("^[0-9]{4}-[0-9]{3}[0-9xX]$", self.query[0]):
            issn = self.query[0]
            if len(query) in (2, 3) and all([re.match("^[\d]{4}$", i) for i in self.query[1:]]):
                year0, year1 = int(self.query[1]), int(self.query[-1])
                if not (1666 < year0 <= year1 <= datetime.now().year):
                    typer.secho('Please ensure valid year.', fg=typer.colors.MAGENTA)
                    raise typer.Exit(code=1)
            else:
                typer.secho('Please follow format: "sci-clone ISSN FROM_YEAR [TO_YEAR]"',
                            fg=typer.colors.MAGENTA)
                raise typer.Exit(code=1)
            container_title, list_dict = self.get_journal_works(issn, year0, year1)
        else:
            query_list = []
            for line in self.query:
                if line.endswith('.txt') or line.endswith('.bib'):
                    query_list += list(self.get_file_list(line))
                else:
                    query_list += [line,]
            container_title = "paper list"
            list_dict = {container_title: query_list}
        return container_title, list_dict
    
    def get_file_list(file_path):
        with open(file_path, 'r') as f:
            file_content = f.read()
        if file_path.endswith('.txt'):
            for line in file_content.split('\n'):
                yield line
        elif file_path.endswith('.bib'):
            items = file_content.lower().strip().split('@')[1:]
            for item in items:
                bibtex = configparser.ConfigParser(allow_no_value=True)
                bibtex.read_string('[item]' + item.rstrip('}\n'))
                bibtex['item']['cate'] = item.split(',')[0].split('{')[0]
                bibtex['item']['citekey'] = item.split(',')[0].split('{')[1]
                for key in bibtex['item']:
                    bibtex['item'][key] = bibtex['item'][key].lstrip('{"').rstrip(',')
                    if bibtex['item'][key].endswith('}') or bibtex['item'][key].endswith('"'):
                        bibtex['item'][key] = bibtex['item'][key][:-1]
                    bibtex['item'][key] = bibtex['item'][key].replace('\n', ' ')
                item_dict = dict(bibtex.items('item'))
                if 'doi' in item_dict:
                    yield item_dict['doi']
                elif 'url' in item_dict:
                    yield item_dict['url']
                elif 'pmid' in item_dict:
                    yield item_dict['pmid']
            
    def get_journal_works(self, issn, year_start, year_end):
        url = f"http://api.crossref.org/journals/{issn}/works"
        cursor = '*'
        results = list()
        while True:
            from_to = f"from-pub-date:{year_start},until-pub-date:{year_end}"
            r = self.requester.request(url, params={"rows": 1000, "cursor": cursor, "filter": from_to})
            r_json = json.loads(r.read())
            total = r_json['message']['total-results']
            cursor = r_json['message']['next-cursor']
            items = r_json['message']['items']
            results += items
            if len(results) < total:
                continue
            else:
                break
        container_title = results[0]['container-title'][0]
        yearly_result = dict()
        for year in range(year_start, year_end+1):
            year_list = list()
            for r in results:
                if r['published']['date-parts'][0][0] == year:
                    if 'DOI' in r:
                        year_list.append(r['DOI'])
                    elif 'URL' in r:
                        year_list.append(r['URL'])
            yearly_result[year] = year_list
        return container_title, yearly_result

In [62]:
class Processing:
    def __init__(self, scihub, requester, query, save_to):
        self.scihub = scihub
        self.requester = requester
        self.query = query
        self.save_to = save_to
        
    def download(self):
        title, list_dict = self.query
        for key,query_list in list_dict.items():
            if title == key:
                label = f"{title}: {len(query_list)}"
                sub_dir = self.save_to
            else:
                label = f"{title}({key}): {len(query_list)}"
                sub_dir = path.join(self.save_to, str(key))
                if not path.exists(sub_dir): mkdir(sub_dir)
            self.walk_the_list(label, query_list, sub_dir)
        
    def walk_the_list(self, label, query_list, sub_dir):
   #     print(version_callback(False))
        undone = list()
        with typer.progressbar(query_list, label=label, show_eta=False, show_percent=False, fill_char="▒", 
                               item_show_func=lambda x: f"{str(query_list.index(x))} | {x}" if x else x) as progress:
            for query in progress:
                item_done = self.get_pdf_scihub(query, sub_dir)
                if not item_done:
                    undone.append(query)
        log = path.join(sub_dir, "missing.log")
        with open(log, 'w') as f:
            if undone:
                f.writelines([f"{i}\n" for i in undone])
                typer.secho(f'missing log: {log}', fg=typer.colors.MAGENTA, bold=True, italic=True)
            else:
                f.write("all done.")
                typer.secho("all done.", fg=typer.colors.GREEN, bold=True, italic=True)
        return undone
    
    def get_pdf_scihub(self, query, sub_dir):
        response = self.requester.request(self.scihub, params={"request": query}, method="POST")
        response_text = response.read().decode()
        if "Sorry, sci-hub has not included this article yet" in response_text:
            file_url, file_name = False, False
        else:
            file_url = re.search("location.href='(.+)'", response_text).group(1).replace("\\", "")
            file_name = re.search("pdf/(.+)\?", file_url).group(1).replace("/", "_")
        if file_name:
            file_path = path.join(sub_dir, file_name)
            if path.exists(file_path):
                return True
            else:
                response = self.requester.request(file_url)
                with open(file_path, 'b+w') as f:
                    f.write(response.read())
                return True
        else:
            return False

In [None]:
requester = Requester(config, )

In [63]:
#query = ["10.1126/science.1248506", "https://www.jstor.org/stable/27854031"]
query_str = ["1360-0540", "2011"]
generator = GenList(query_str)
query = generator.get_query_list()

KeyboardInterrupt: 

In [53]:
scihub = "https://sci-hub.wf/"
requester = generator
save_to = "./"
processing = Processing(scihub, requester, query, save_to)

In [54]:
processing.download()

Journal of Education for Teaching(2011): 56
[32m[1m[3mall done.[0m


In [42]:
query

('Journal of Education for Teaching',
 {2011: ['10.1080/02607476.2011.558265',
   '10.1080/02607476.2011.538267',
   '10.1080/02607476.2011.538278',
   '10.1080/02607476.2011.611010',
   '10.1080/02607476.2011.588020',
   '10.1080/02607476.2011.562013',
   '10.1080/02607476.2011.611229',
   '10.1080/02607476.2011.610982',
   '10.1080/02607476.2011.588029',
   '10.1080/02607476.2011.611234',
   '10.1080/02607476.2011.538273',
   '10.1080/02607476.2011.588018',
   '10.1080/02607476.2011.611009',
   '10.1080/02607476.2011.540909',
   '10.1080/02607476.2011.611233',
   '10.1080/02607476.2011.611021',
   '10.1080/02607476.2011.611017',
   '10.1080/02607476.2011.610988',
   '10.1080/02607476.2011.588027',
   '10.1080/02607476.2011.538268',
   '10.1080/02607476.2011.587981',
   '10.1080/02607476.2011.540929',
   '10.1080/02607476.2011.610986',
   '10.1080/02607476.2011.558273',
   '10.1080/02607476.2011.588030',
   '10.1080/02607476.2011.558287',
   '10.1080/02607476.2011.538274',
   '10.1080

In [127]:
from urllib import request, parse
import json

In [198]:
@retry(retry_count=3, retry_interval=5)
def request_to(url, headers=False, params="", timeout=60, method="GET"):
    if method == "GET":
        query_string = parse.urlencode(params)
#        print((url, query_string))
        if params:
            req = request.Request(f"{url}?{query_string}")
        else:
            req = request.Request(url)
    elif method == "POST":
        req = request.Request(url, query_string.encode("UTF-8"))
    if headers:
        for key in headers.keys():
            req.add_header(key, headers[key])
    response = request.urlopen(req, timeout=timeout)
    response_text = response.read().decode()
    return response_text

In [106]:
journal_title = "Journal_of_Education_for_Teaching"
journal_issn = "1360-0540"

In [199]:
def get_journal_works(issn, year_span):
    url = f"http://api.crossref.org/journals/{issn}/works"
    v_APP = 'Sci-Clone'
    v_APP_Ver = 'v0.4'
    v_APP_URL = 'https://github.com/f10w3r/sci-clone'
    v_APP_Email = 'lifuminster@gmail.com'
    v_API = "innerFunction"
    v_API_Ver = "v1.5"
    etiquette = f"{v_APP}/{v_APP_Ver} ({v_APP_URL}; mailto:{v_APP_Email}) BasedOn: {v_API}/{v_API_Ver}"
    
    cursor = '*'
    results = list()
    while True:
        r = request_to(url, headers = {"user-agent": etiquette},
                       params={"rows": 1000, 
                               "cursor": cursor, 
                               "filter": f"from-pub-date:{year_span[0]},until-pub-date:{year_span[1]}"})
        r_json = json.loads(r)
        total = r_json['message']['total-results']
        cursor = r_json['message']['next-cursor']
        items = r_json['message']['items']
        results += items
        if len(results) < total:
            continue
        else:
            break

    yearly_result = dict()
    for year in range(year_span[0], year_span[1]+1):
        year_list = list()
        for r in results:
            if r['published']['date-parts'][0][0] == year:
                if 'DOI' in r:
                    year_list.append(r['DOI'])
                elif 'URL' in r:
                    year_list.append(r['URL'])
        yearly_result[year] = year_list
    return yearly_result

In [282]:
import pyfiglet
from random import choice

In [378]:

f = Figlet(font=font)

text = f.renderText('sci-clone')
text.index("\n")

72

In [490]:
sword = """
______________________________________________________|_._._._._._._._._._.
\_____________________________________________________|_#_#_#_#_#_#_#_#_#_|
                                                      l
"""

In [518]:
len("  @@@@@@  @@@@@@@ @@@           @@@@@@@ @@@       @@@@@@  @@@  @@@ @@@@@@@@")

75

In [541]:
while True:
    font = choice(pyfiglet.FigletFont.getFonts())
    f = Figlet(font=font)
    text = f.renderText('sci-clone')
    n_col = len(text.split("\n")[0])
    if n_col > 70:
        continue
    else:
        print(f"{text}\n        (figlet font: {font})")
        time.sleep(2)

                                                    
                                                    
                                                    
             ##              ###                    
             ##               ##                    
                             ##                     
  ###   ### ###         ###  ##    ##  ## ##    ### 
 ##    #  #  ## #####  #  #  ##   # ##  ## ##  #  # 
 ###  ##    ##        ##    ##   ## ##  ## ## ####  
  ##  ##  # ##        ##  # ##   ## #  ## ##  ##  # 
###    ###  ###        ###  ###   ##   ## ###  ###  
                                                    
                                                    

        (figlet font: xsbookbi)
                               /^^                             
               /^              /^^                             
 /^^^^    /^^^            /^^^ /^^   /^^    /^^ /^^     /^^    
/^^     /^^   /^^/^^^^^ /^^    /^^ /^^  /^^  /^^  /^^ /^   /^^ 
  /^^^ /^^    /^^     

KeyboardInterrupt: 

In [306]:
len(pyfiglet.FigletFont.getFonts())

425

In [200]:
yearly_result = get_journal_works(journal_issn, [2000, 2019])

In [201]:
yearly_result

{2000: ['10.1080/713676891',
  '10.1080/713676893',
  '10.1080/02607470050127108',
  '10.1080/02607470050127072',
  '10.1080/02607470050127045',
  '10.1080/02607470050127027',
  '10.1080/713676868',
  '10.1080/02607470050127036',
  '10.1080/02607470050007147',
  '10.1080/02607470050007129',
  '10.1080/713676889',
  '10.1080/713676890',
  '10.1080/713676894',
  '10.1080/02607470050127126',
  '10.1080/02607470050127081',
  '10.1080/02607470050007101',
  '10.1080/02607470050007110',
  '10.1080/02607470050127063',
  '10.1080/713676892',
  '10.1080/02607470050007156',
  '10.1080/713676888',
  '10.1080/02607470050007138',
  '10.1080/713676874',
  '10.1080/02607470050127117',
  '10.1080/02607470050127054',
  '10.1080/02607470050127090'],
 2001: ['10.1080/02607470120091579',
  '10.1080/02607470120042591',
  '10.1080/02607470120042573',
  '10.1080/02607470120067927',
  '10.1080/02607470120091551',
  '10.1080/02607470120091597',
  '10.1080/02607470120042519',
  '10.1080/02607470120042555',
  '10

In [172]:
list()

[2001, 2002, 2003, 2004, 2005]

In [None]:
results

In [160]:
len(results)

1728

In [167]:
r_2001 = 

In [168]:
len(r_2001)

26

In [43]:
def get_url(session, url_scihub, query):
    payload = {"request": query}
    header = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"}
    r = session.post(url_scihub, data=payload, headers=header)
    if "Sorry, sci-hub has not included this article yet" in r.text:
        return False, False
    else:
        element = r.html.xpath('//*[@id="buttons"]/ul/li[2]/a', first=True)
        url_pdf = element.attrs['onclick'][15:-1].replace('\\', '')
        return url_pdf, f"{'_'.join(r.url.split('/')[3:])}.pdf"

In [44]:
get_url(session, url_scihub, url_jstor_test)

('https://sci.bban.top/pdf/10.2307/27854031.pdf?download=true',
 '10.2307_27854031.pdf')

In [36]:
import re

In [2]:
from urllib import request, parse

In [64]:
def get_url(url_scihub, query):
    payload = {"request": query}
    query_string = parse.urlencode(payload)    
    data = query_string.encode("ascii")
    r = request.Request(url_scihub, data, method='POST')
    r.add_header("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)")
    response = request.urlopen(r)
    response_text = response.read().decode()
    if "Sorry, sci-hub has not included this article yet" in response_text:
        return False, False
    else:
        url_pdf = re.search("location.href='(.+)'", response_text).group(1).replace("\\", "")
        return url_pdf, re.search("pdf/(.+)\?", url_pdf).group(1).replace("/", "_")

In [51]:
r = get_url(url_scihub, doi)

In [52]:
r

(False, False)

In [65]:
r = get_url(url_scihub, url_jstor_test)

In [66]:
r

('https://sci.bban.top/pdf/10.2307/27854031.pdf?download=true',
 '10.2307_27854031.pdf')

In [88]:
re.findall("s", query)

[]

In [87]:
len(re.findall("s", query)) == 1

False

In [83]:
url_libgen = "http://libgen.rs/scimag/"
query = "10.2307/27854031"

In [None]:
if "Sorry, sci-hub has not included this article yet" in response_text:
        file_url, file_name = False, False
    else:
        file_url = re.search("location.href='(.+)'", response_text).group(1).replace("\\", "")
        file_name = re.search("pdf/(.+)\?", file_url).group(1).replace("/", "_")
    if file_url:
        req = request.Request(file_url, method='GET')
        req.add_header("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)")
        response = request.urlopen(req, timeout=60)
        with open(path.join(save_to, file_name), 'b+w') as f:
            f.write(response.read())
        return True
    else:
        return False

In [178]:
def get_pdf_libgen(url_libgen, query):
    payload = {"q": query}
    query_string = parse.urlencode(payload)
    print(query_string)
    req = request.Request(url_libgen + '?' + query_string, method='GET')
    req.add_header("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)")
    print(req.full_url)
    response = request.urlopen(req, timeout=60)
    response_text = response.read().decode()
    results = re.findall("<tr>([\S\s]+)</tr>", response_text)
    if not results:
        return False
    else:
        for r in results:
            doi = re.findall("DOI\: ([\s\S]+)</p>\t", r)
            print(doi)
            if doi: doi = doi[0]
            print(doi)
            if (query in doi) ^ (doi[0] in query):
                mirrors = re.search('<ul class="record_mirrors">([\s\S]+)', result).group()
                if mirrors: urls = re.findall('href="(\S+)"', mirrors)
                if urls: url_liblol = [i for i in urls if "library.lol" in i][0]
                item_done = 
                return r
    return results

In [193]:
import time
import sys

total = 1007  # total number to reach
bar_length = 30  # should be less than 100
for i in range(total+1):
    percent = 100.0*i/total
    sys.stdout.write('\r')
    sys.stdout.write("Completed: [{:{}}] {:>3}%"
                     .format('='*int(percent/(100.0/bar_length)), bar_length, int(percent)))
    sys.stdout.flush()
    time.sleep(0.002)



In [179]:
result = get_pdf_libgen(url_libgen, query="10.1177/0894439319886019")

q=10.1177%2F0894439319886019
http://libgen.rs/scimag/?q=10.1177%2F0894439319886019
['10.1177/0894439319886019']
10.1177/0894439319886019


In [186]:
mirrors = re.search('<ul class="record_mirrors">([\s\S]+)', result).group()
re.findall('href="(\S+)"', mirrors)

['https://sci-hub.se/10.1177/0894439319886019',
 'http://library.lol/scimag/10.1177/0894439319886019',
 'https://cdn1.booksdl.org/ads.php?doi=10.1177/0894439319886019',
 'https://booksc.org/s/?q=10.1177/0894439319886019']

In [182]:
re.findall("DOI: ([\s\S]+)</p>\t", result)

['10.1177/0894439319886019']

In [152]:
not result

False

In [63]:
re.search("pdf/(.+)\?", 'https://sci.bban.top/pdf/10.2307/27854031.pdf?download=true').group(1).replace("/", "_")


'10.2307_27854031.pdf'

In [45]:
re.search("location.href='(.+)'", r).group(1).replace("\\", "")

'https://sci.bban.top/pdf/10.2307/27854031.pdf?download=true'

In [33]:
type(r)

str

In [188]:
True ^ False

True

In [189]:
False ^ True

True

In [190]:
False ^ False

False

In [191]:
True ^ True

False

In [None]:
r = session.post(url_scihub, data=payload, headers=header)
    if "Sorry, sci-hub has not included this article yet" in r.text:
        return False, False
    else:
        element = r.html.xpath('//*[@id="buttons"]/ul/li[2]/a', first=True)
        url_pdf = element.attrs['onclick'][15:-1].replace('\\', '')
        return url_pdf, f"{'_'.join(r.url.split('/')[3:])}.pdf"

In [60]:
def get_list(file):
    with open(file, 'r') as f:
        for line in f.readlines():
            yield line.strip()

In [63]:
list(range(2012, 2012+1))

[2012]

In [61]:
[i for i in get_list("./examples/doi.txt")]

['10.1016/j.compbiomed.2021.104499',
 '10.1016/j.ssresearch.2016.04.017',
 '10.1109/ICAwST.2019.8923186',
 '10.1016/j.engappai.2012.08.002',
 '10.1016/j.patrec.2014.09.008',
 '10.1080/10670564.2020.1790899']

In [48]:
file_url, file_name = get_url(session, url_scihub, doi)
if file_url:
    r = session.get(file_url)
    with open(file_name,'wb') as output_file:
        output_file.write(r.content)
else:
    print("Sorry, sci-hub has not included this article yet.")

Sorry, sci-hub has not included this article yet


In [68]:
import numpy as np

In [69]:
def f(s, i):
    p2 = (s[i] - np.mean(s))**2
    return (1/len(s)) * p2

In [71]:
sum([f(s, i) for i in range(len(s))])

0.4898437500000001

In [72]:
0.6999**2

0.48986000999999996

In [74]:
a = [] 
a += [1,2,3]
a

[1, 2, 3]

In [80]:
def get_issn_list(session, year, issn):
        """
            get DOI list by year from CrossRef.org
        """
        url = f"https://api.crossref.org/journals/{issn}/works"
        cursor = '*'
        doi_list = list()
        year = str(year)
        while True:
            params = {
                "rows": 1000,
                "cursor": cursor,
                "filter": f"from-pub-date:{year}-01,until-pub-date:{year}-12"
            }
            r = session.get(url, params=params, 
                            headers={"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"},
                            timeout=30)
            j = json.loads(r.text)
            if len(j['message']['items']):
                doi_list.extend(j['message']['items'])
                cursor = j['message']['next-cursor']
            else:
                break
        return doi_list

In [9]:
from requests_html import HTMLSession
session = HTMLSession()

In [None]:
session.get(headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"})

In [81]:
get_issn_list(session, 2021, "0002-9602")

ReadTimeout: HTTPSConnectionPool(host='api.crossref.org', port=443): Read timed out. (read timeout=30)

In [45]:
issn = "1360-0540"
url = f"http://api.crossref.org/journals/{issn}/works"

cursor = '*'
results = list()

session = HTMLSession()

while True:
    r = session.get(url, params={"rows": 1000, "cursor": cursor}, 
                    headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"})
    total = r.json()['message']['total-results']
    cursor = r.json()['message']['next-cursor']
    items = r.json()['message']['items']
    results += items
    print(len(results), total)
    if len(results) < total:
        continue
    else:
        break

1000 1728
1728 1728


In [50]:
journal_title = "Journal_of_Education_for_Teaching"
journal_issn = "1360-0540"

In [47]:
import pandas as pd

In [48]:
articles_df = pd.json_normalize(results)

In [51]:
articles_df.to_csv(f"{journal_title}.tsv", sep='\t')

In [55]:
ref_list = []
for references in tqdm(articles_df.reference.tolist()):
    if type(references) == list:
        ref_list += references

  0%|          | 0/1728 [00:00<?, ?it/s]

In [63]:
list(ref_info.keys())[-10:]

['10.1016/S0742-051X(02)00087-2',
 '10.1080/13504620120065230',
 '10.1080/0261976870100305',
 '10.1016/0742-051X(93)90015-9',
 '10.1080/03057640123915',
 '10.1108/09513549410065701',
 '10.1037/0022-0663.71.5.669',
 '10.1017/9781316275184',
 '10.1017/9781009024532',
 '10.1080/03057267.2013.858496']

In [80]:
print(my_etiquette)

Sci-Clone/v0.4 (https://github.com/f10w3r/sci-clone; mailto:lifuminster@gmail.com) BasedOn: CrossrefAPI/1.5.0


In [82]:
v_APP = 'Sci-Clone'
v_APP_Ver = 'v0.4'
v_APP_URL = 'https://github.com/f10w3r/sci-clone'
v_APP_Email = 'lifuminster@gmail.com'
v_API = "innerFunction"

etiquette = f"{v_APP}/{v_APP_Ver} ({v_APP_URL}; mailto:{v_APP_Email}) BasedOn: {v_API}/{v_APP_Ver}"
etiquette

'Sci-Clone/v0.4 (https://github.com/f10w3r/sci-clone; mailto:lifuminster@gmail.com) BasedOn: innerFunction/v0.4'

In [73]:
import time
from crossref.restful import Works, Etiquette

my_etiquette = Etiquette('Sci-Clone', 'v0.4', 
                         'https://github.com/f10w3r/sci-clone', 
                         'lifuminster@gmail.com')
works = Works(etiquette=my_etiquette)

In [74]:
while True:
    try:
        for ref in tqdm(ref_list):
            if "DOI" in ref:
                if ref['DOI'] not in ref_info:
                    ref_info[ref['DOI']] = works.doi(ref['DOI'])
              #      print(ref['DOI'])
    except:
        print("error, retrying...")
        time.sleep(30)
        continue
    break

  0%|          | 0/34246 [00:00<?, ?it/s]

error, retrying...


  0%|          | 0/34246 [00:00<?, ?it/s]

In [75]:
articles_ref_df = pd.json_normalize(ref_info.values())

In [76]:
articles_ref_df.to_csv(f'{journal_title}_ref_DOI.tsv', sep='\t')

In [77]:
articles_ref_df

Unnamed: 0,reference-count,publisher,issue,short-container-title,DOI,type,page,source,is-referenced-by-count,title,...,event.sponsor,relation.has-preprint,clinical-trial-number,group-title,subtype,posted.date-parts,relation.is-preprint-of,degree,approved.date-parts,relation.has-review
0,15.0,Informa UK Limited,3,[Educational Research],10.1080/0013188790210307,journal-article,212-219,Crossref,13.0,[Early Changes in Teacher Attitude],...,,,,,,,,,,
1,12.0,Informa UK Limited,3-4,[South Pacific Journal of Teacher Education],10.1080/0311213790070304,journal-article,92-98,Crossref,7.0,[From Student to Primary School Teacher: Attit...,...,,,,,,,,,,
2,0.0,University of Chicago Press,3,[The School Review],10.1086/442847,journal-article,312-323,Crossref,61.0,[The Influence of Experience on the Beginning ...,...,,,,,,,,,,
3,0.0,Wiley,1,[],10.1111/j.2044-8260.1967.tb00496.x,journal-article,32-37,Crossref,31.0,[The Educational Opinions of Teachers in Train...,...,,,,,,,,,,
4,11.0,Informa UK Limited,3,[Educational Research],10.1080/0013188670090307,journal-article,208-212,Crossref,30.0,[THEORY AND PRACTICE IN THE EDUCATION OF TEACH...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11482,21.0,Springer Science and Business Media LLC,1,[J Math Teacher Educ],10.1007/s10857-006-9006-8,journal-article,91-102,Crossref,102.0,[Viewing Mathematics Teachers’ Beliefs as Sens...,...,,,,,,,,,,
11483,42.0,Springer Science and Business Media LLC,3,[Educ Stud Math],10.1007/s10649-005-2745-0,journal-article,361-391,Crossref,88.0,[Issues of Methods and Theory in the Study of ...,...,,,,,,,,,,
11484,12.0,Wiley,5,[],10.1111/j.1949-8594.2004.tb18245.x,journal-article,226-232,Crossref,48.0,[Change in Preservice Teachers' Beliefs: An Ev...,...,,,,,,,,,,
11485,8.0,Informa UK Limited,3,[Journal of Education for Teaching],10.1080/09589236.2019.1599507,journal-article,353-357,Crossref,6.0,[Enhancing preservice teachers’ professional c...,...,,,,,,,,,,


In [5]:
from crossref.restful import Journals
from tqdm.notebook import tqdm
tqdm.pandas()

journals = Journals()

In [6]:
ws = journals.works('1360-0540').filter(from_pub_date='2017', until_pub_date='2017')

In [7]:
ws.url

'https://api.crossref.org/journals/1360-0540/works?filter=from-pub-date%3A2017%2Cuntil-pub-date%3A2017'

In [8]:
result = [i for i in tqdm(ws)]

0it [00:00, ?it/s]

In [54]:
works.doi('10.1590/0102-311x00133115')

NameError: name 'works' is not defined