In [81]:
import requests
import json
import threading
import re
import spacy
from datetime import date
from operator import itemgetter

todays_date = date.today()
sp = spacy.load('en_core_web_sm')

def citations_per_paper_score(citations, number_of_papers):
    
    citations = int(citations)
    number_of_papers = int(number_of_papers)
    
    citations_per_paper = citations / number_of_papers
    if citations_per_paper < 5 :
        return 0.03 * citations_per_paper
    else :
        return 0.15
    
def publications_per_year_score(publication, exp):
    
    publication = int(publication)
    exp = int(exp)
    
    publications_per_year = publication / (exp)
    if publications_per_year < 3:
        return 0.15 * publications_per_year / 2
    elif publications_per_year < 6:
        return 0.15 * (6 - publications_per_year) / 2
    else:
        return 0
    
def experience(data):
    
    end_year = 0
    start_year = 3000
    try : 
        
        for paper in data['papers']:
            if(paper['year'] > end_year):
                end_year = paper['year']
            if(paper['year'] < start_year):
                start_year = paper['year']

        exp = end_year - start_year + 1
        return exp
    except:
        return 1
    
def experience_score(start_year, end_year):
    time_duration = end_year - start_year + 1
    if time_duration < 16:
        return 0.15 * time_duration / 15
    else:
        return 0.15
    
def check_topic_relevance(keywords, data):
    score = 0
    for papers in data['papers']:
        for publication_topics in papers['title']:
            text_tokens = list(keywords)
            publication_topics = list(publication_topics)
            all_stopwords = sp.Defaults.stop_words
            tokens_without_stopwords = [word for word in text_tokens if not word in all_stopwords]
            relevant_topics = [word for word in tokens_without_stopwords for topic in publication_topics if topic == word]
            if len(relevant_topics) == len(tokens_without_stopwords):
                score += 0.02
            else:
                score += 0.02 * len(relevant_topics) / len(tokens_without_stopwords)
    
    return score/50

def hIndex(data):
    return data['hIndex']/30
    
def authorJson(authorId):
    URL = "https://api.semanticscholar.org/graph/v1/author/" + (authorId) + "?fields=url,name,affiliations,paperCount,citationCount,hIndex,papers.title,papers.year"
    data = (requests.get(url = URL )).json()
    return data
    
def authorScore(keywords,authorId):
    
    data = authorJson(authorId)
    exp = experience(data)
    ppy_score = publications_per_year_score(str(data['paperCount']),str(exp))
    cpp_score = citations_per_paper_score(str(data['citationCount']),str(data['paperCount']))
    ctr_score = check_topic_relevance(keywords,data)
    hi_score = hIndex(data)
    
    authorScore = ppy_score + cpp_score + ctr_score + hi_score

    return authorScore

def influential_citation_count(paper):
    score = paper['influentialCitationCount']
    if(score < 50):
        return score/250
    else:
        return 0.5
    
def authorList(paper):
    authors = paper['authors']
    authorList = []
    i = 1;
    for author in authors:
        dict = {}
        data = authorJson(author['authorId'])
        try:
            dict['affiliation'] = data['affiliations'][0]
        except:
            dict['affiliation'] = " "
        dict['authorUrl'] = data['url']
        dict['id'] = data['authorId']
        dict['full_name'] = data['name']
        dict['author_order'] = i
        i = i + 1
        authorList.append(dict)
    
    return authorList
    
    
def citation_per_year(paper):
    score = (paper['citationCount'] / (todays_date.year - paper['year']))
    if(score < 50):
        return score/250
    else:
        return 0.5
    
def papers(keywords):
    tokens = keywords.split(" ")
    tokenString = '+'.join(tokens)
    URL = "https://api.semanticscholar.org/graph/v1/paper/search?query=" + tokenString + "&offset=10&limit=10&fields=title,url,authors,abstract,year,referenceCount,citationCount,influentialCitationCount,journal,publicationDate"
    return ((requests.get(url = URL )).json()['data'])

def ranking(keywords):
    researchPapers = papers(keywords)
    articles = []
    for paper in researchPapers:
        dict = {}
        
        authors = paper['authors']
        numberOfAuthors = len(authors)
        author_score = 0
        if(numberOfAuthors == 1):
            author_score = (authorScore(keywords,authors[0]['authorId']))
        else:
            author_score = ((authorScore(keywords,authors[0]['authorId'])) + (authorScore(keywords,authors[1]['authorId']))) / 2
        authorsList = authorList(paper)
        try:
            dict['title'] = paper['title']
        except:
            dict['title'] = " "
        try:
             dict['pdf_url'] = paper['url']
        except:
             dict['pdf_url'] = " "
        try:
            dict['abstract'] = paper['abstract']
        except:
            dict['abstract'] = " "
        try:
            dict['publication_year'] = paper['year']
        except:
            dict['publication_year'] = " "
        try:
            dict['citing_paper_count'] = paper['citationCount']
        except:
            dict['citing_paper_count'] = " "
        try:
            dict['influenceScore'] = paper['influentialCitationCount']
        except:
            dict['influenceScore'] = " "
        try:
            dict['auhtorScore'] = str(author_score)
        except:
            dict['auhtorScore'] = " "
        try:
            dict['paperScore'] = (author_score + citation_per_year(paper))
        except:
            dict['paperScore'] = 0
        try:
            dict['publication_date'] = paper['publicationDate']
        except:
            dict['publication_date'] = " "

        dict['publisher'] = " "
        
        try:
            dict['journal'] = paper['journal']['name']
        except:
            dict['journal'] = " "
        try:
            dict['authors'] = {"authors" : authorsList}
        except:
            dict['authors'] = {"authors" : []}
        
        articles.append(dict)
        
    articles = sorted(articles, key=itemgetter('paperScore'), reverse=True)
    return {"articles" : articles}
        

In [None]:
print(ranking("Image Analysis"))

{'title': 'Deep learning with noisy labels: exploring techniques and remedies in medical image analysis', 'pdf_url': 'https://www.semanticscholar.org/paper/7e40d5d095db557810ae9560db33a3c2a09e630c', 'abstract': None, 'publication_year': 2019, 'citing_paper_count': 209, 'influenceScore': 15, 'auhtorScore': '0.6788545454545476', 'paperScore': 1.1788545454545476, 'publication_date': '2019-12-05', 'publisher': ' ', 'journal': 'Medical image analysis', 'authors': {'authors': [{'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/2130813', 'id': '2130813', 'full_name': 'D. Karimi', 'author_order': 1}, {'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/10669778', 'id': '10669778', 'full_name': 'Haoran Dou', 'author_order': 2}, {'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/145502062', 'id': '145502062', 'full_name': 'S. Warfield', 'author_order': 3}, {'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/

{'title': 'Deep learning for cellular image analysis', 'pdf_url': 'https://www.semanticscholar.org/paper/48cc41c7b2fac21d7bbd2988c5c6a2c5f9744852', 'abstract': None, 'publication_year': 2019, 'citing_paper_count': 506, 'influenceScore': 5, 'auhtorScore': '0.5051522727272728', 'paperScore': 1.0051522727272728, 'publication_date': '2019-05-27', 'publisher': ' ', 'journal': 'Nature Methods', 'authors': {'authors': [{'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/2065614284', 'id': '2065614284', 'full_name': 'Erick Moen', 'author_order': 1}, {'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/91548448', 'id': '91548448', 'full_name': 'Dylan Bannon', 'author_order': 2}, {'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/5864924', 'id': '5864924', 'full_name': 'Takamasa Kudo', 'author_order': 3}, {'affiliation': ' ', 'authorUrl': 'https://www.semanticscholar.org/author/2054090660', 'id': '2054090660', 'full_name': 'Willia