In [1]:
'''
This notebook was used to prototype the search engine, please run it after you've ran the first two 
notebooks (create_pubmed_db and create_kw_similarity_model) to ensure you have the databases and the model
'''

In [None]:
import pickle
from transformers import *
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import random

In [2]:
with open('dict_articles_V2.pkl', 'rb') as handle:
    dict_articles = pickle.load(handle)
    
with open('dictionary.pkl', 'rb') as handle:
    dict_vocab = pickle.load(handle)
    
with open('dict_authors_v4.pickle', 'rb') as handle:
    dict_authors = pickle.load(handle)

with open('neighbor_model.pkl','rb') as handle:
    neighbor_model = pickle.load(handle)

In [5]:
scibert_tokenizer = AutoTokenizer.from_pretrained('dmis-lab/biobert-v1.1')
scibert_model = AutoModel.from_pretrained('dmis-lab/biobert-v1.1')

In [6]:
def tokenize_string(string):
    token_keyword = torch.tensor(scibert_tokenizer.encode(string)).unsqueeze(0)
    out = scibert_model(token_keyword)
    vector = out[0][0][1:-1].detach().numpy().mean(axis=0)
    return vector

def get_preliminary_keywords(vector,max_distance=100):
    twenty_nearest_neighbors = neighbor_model.kneighbors(vector.reshape(1,-1))
    preliminary_lst_keywords = []
    for distance,pos in zip(twenty_nearest_neighbors[0][0],twenty_nearest_neighbors[1][0]):
        if distance < max_distance:
            preliminary_lst_keywords.append(list(dict_vocab.keys())[pos])
            
    return preliminary_lst_keywords

In [7]:
search_query = 'ivermectin treatment efficacy'
vector = tokenize_string(search_query)
prelim_kw = get_preliminary_keywords(vector,max_distance=120)
prelim_kw

['ivermectin',
 'ivermectina',
 'citoquine storm',
 'chloroquine.',
 'chloroquine derivatives',
 'clq, chloroquine',
 'insecticide-treated nets',
 'oh chloroquine',
 'chloroquine sulfate',
 'cloroquina',
 'ipac, infection prevention and control',
 'case report | fluconazole',
 'chloroquine',
 'clarithromycin covid19.',
 'imi, imiquimod 5% cream',
 'covid19, wuhan virus, hydroxychloroquine, remedesevir, antihelminth, anti-cytokine therapy, antiviral.',
 'asinex antiviral library',
 'artemisinin derivatives',
 'lumiradx antigen test',
 'cq, chloroquine']

In [8]:
articles_of_interest = []
for key,values in dict_articles.items():
    if any([keyword in values['Keywords'] for keyword in prelim_kw]):
        articles_of_interest.append(key)
articles_of_interest   

['10.3760/cma.j.issn.1001-0939.2020.0019',
 '10.5582/bst.2020.01047',
 '10.1016/j.cmi.2020.05.016',
 '10.1016/j.pharmthera.2020.107587',
 '10.1007/s00210-020-01902-5',
 '10.1007/s12011-020-02194-9',
 '10.1080/24734306.2020.1757967',
 '10.1016/j.jiph.2020.05.005',
 '10.1016/j.ijantimicag.2020.106028',
 '10.1016/j.hrthm.2020.05.008',
 '10.1161/CIRCEP.120.008688',
 '10.3389/fmed.2020.00192',
 '10.3390/ph13050098',
 '10.3390/ph13050096',
 '10.1016/j.ijsu.2020.05.018',
 '10.1016/j.lfs.2020.117775',
 '10.1016/j.therap.2020.05.004',
 '10.1007/s10840-020-00765-3',
 '10.31138/mjr.31.1.94',
 '10.1016/j.biopha.2020.110267',
 '10.1016/j.sapharm.2020.04.031',
 '10.1016/j.mehy.2020.109815',
 '10.1016/j.ijantimicag.2020.106020',
 '10.7759/cureus.7608',
 '10.1177/0194599820928989',
 '10.1007/s40495-020-00216-7',
 '10.1111/resp.13845',
 '10.3389/fmed.2020.00184',
 '10.1016/j.ijantimicag.2020.106012',
 '10.1177/1060028020925558',
 '10.1016/j.tmaid.2020.101735',
 '10.1186/s13054-020-02932-4',
 '10.1016/j

In [45]:
def add_first_author(authors_search_results,first_author,article_doi):
    if len(first_author.strip())>2:
        name = first_author.split(' - ')[0]
        affiliation = first_author.split(' - ')[1]
        if first_author in authors_search_results:
            authors_search_results[name]['Score'] += 4
            authors_search_results[name]['Articles'].append(article_doi)
            authors_search_results[name]['Affiliation'].append(affiliation)
        else:
            authors_search_results[name] = {'Score':4,'Articles':[article_doi],'Affiliation':[affiliation]}
    return authors_search_results

def add_other_authors(authors_search_results,other_authors,article_doi):
    for author in other_authors:
        if len(author.strip())>2:
            name = author.split(' - ')[0]
            affiliation = author.split(' - ')[1]
            if author in authors_search_results:
                authors_search_results[name]['Score'] += 1
                authors_search_results[name]['Articles'].append(article_doi)
                authors_search_results[name]['Affiliation'].append(affiliation)
            else:
                authors_search_results[name] = {'Score':1,'Articles':[article_doi],'Affiliation':[affiliation]}
    return authors_search_results

In [50]:
authors_search_results = {}

for article_doi in articles_of_interest:
    article_info = dict_articles[article_doi]
    authors_search_results = add_first_author(authors_search_results,
                                              article_info['First_Author'],
                                              article_doi)
    authors_search_results = add_other_authors(authors_search_results,
                                          article_info['Other_Authors'],
                                          article_doi)
    
authors_search_results = dict(sorted(authors_search_results.items(), key=lambda item: item[1]['Score'],reverse=True))

for key,values in authors_search_results.items():
    print(key, values['Score'], values['Affiliation'])
    for article_doi in values['Articles']:
        article_info = dict_articles[article_doi]
        print(article_info['Title'],article_info['Journal'],article_info['Publication_Date'])

J Jianjun Gao 4 ['Department of Pharmacology, School of Pharmacy, Qingdao University, Qingdao, China.']
Update on use of chloroquine/hydroxychloroquine to treat coronavirus disease 2019 (COVID-19). Bioscience trends 2020-04-14
C C Rodrigo 4 ['Department of Pathology, School of Medical Sciences, UNSW Sydney, Australia.']
Clinical evidence for repurposing chloroquine and hydroxychloroquine as antiviral agents: a systematic review. Clinical microbiology and infection : the official publication of the European Society of Clinical Microbiology and Infectious Diseases 2020-05-30
T Tianxiao Liu 4 ["Department of Medicine, Brigham and Women's Hospital and Harvard Medical School, Boston, MA 02115, USA."]
Cathepsin L-selective inhibitors: A potentially promising treatment for COVID-19 patients. Pharmacology & therapeutics 2020-05-30
E Emanuele Rizzo 4 ['Department of Prevention, Local Health Authority of Lecce (ASL Lecce), Lecce, Italy. emanuele.rizzo@email.com.\nItalian Society of Environmental

An Update on Current Therapeutic Drugs Treating COVID-19. Current pharmacology reports 2020-05-13
Z Zhigang Liu 1 ['1Department of Pharmaceutics, Ernest Mario School of Pharmacy, Rutgers University, Piscataway, NJ 08854 USA.\n3Department of Food and Pharmaceutical Engineering, Guiyang University, Guiyang, 550005 China.']
An Update on Current Therapeutic Drugs Treating COVID-19. Current pharmacology reports 2020-05-13
GJ George J Poiani 1 ['4Robert Wood Johnson University Hospital Somerset, Somerville, NJ 08876 USA.\n5Robert Wood Johnson Medical School, New Brunswick, NJ 08901 USA.']
An Update on Current Therapeutic Drugs Treating COVID-19. Current pharmacology reports 2020-05-13
L Louis Amorosa 1 ['5Robert Wood Johnson Medical School, New Brunswick, NJ 08901 USA.']
An Update on Current Therapeutic Drugs Treating COVID-19. Current pharmacology reports 2020-05-13
L Luigi Brunetti 1 ['4Robert Wood Johnson University Hospital Somerset, Somerville, NJ 08876 USA.\n6Department of Pharmacy Pra

Chloroquine dosage regimens in patients with COVID-19: Safety risks and optimization using simulations. Safety science 2020-06-06
Y YuTing Chen 1 ["Xi'an Medical University , Xi'an, China.\nDepartment of Infectious Diseases, First Affiliated Hospital of Xi'an Jiaotong University , Xi'an, China."]
Advances in the use of chloroquine and hydroxychloroquine for the treatment of COVID-19. Postgraduate medicine 2020-06-05
X XiuDe Fan 1 ["Department of Infectious Diseases, First Affiliated Hospital of Xi'an Jiaotong University , Xi'an, China."]
Advances in the use of chloroquine and hydroxychloroquine for the treatment of COVID-19. Postgraduate medicine 2020-06-05
X XiaoYun Wang 1 ["Department of Infectious Diseases, First Affiliated Hospital of Xi'an Jiaotong University , Xi'an, China."]
Advances in the use of chloroquine and hydroxychloroquine for the treatment of COVID-19. Postgraduate medicine 2020-06-05
Q QunYing Han 1 ["Department of Infectious Diseases, First Affiliated Hospital of Xi'

JN Jeremy N Day 1 ['Oxford University Clinical Research Unit, University of Oxford, Ho Chi Minh City, Vietnam.\nCentre for Tropical Medicine and Global Health, University of Oxford, Oxford, UK.']
A multi centre randomized open label trial of chloroquine for the treatment of adults with SARS-CoV-2 infection in Vietnam. Wellcome open research 2020-10-29
NVV Nguyen Van Vinh Chau 1 ['Hospital for Tropical Diseases, Ho Chi Minh City, Vietnam.']
A multi centre randomized open label trial of chloroquine for the treatment of adults with SARS-CoV-2 infection in Vietnam. Wellcome open research 2020-10-29
GF Gijs F Kapel 1 ['Department of Cardiology, Medisch Spectrum Twente, Enschede, The Netherlands.']
Drug-induced 'Torsade de Pointes' in a COVID-19 patient despite discontinuation of chloroquine. Importance of its long half-life: a case report. European heart journal. Case reports 2020-10-23
J Jurren van Opstal 1 ['Department of Cardiology, Medisch Spectrum Twente, Enschede, The Netherlands.']
D

V Véronique Jacomo 1 ['Eurofins Biomnis, Lyon, France.']
Low blood zinc concentrations in patients with poor clinical outcome during SARS-CoV-2 infection: is there a need to supplement with zinc COVID-19 patients? Journal of microbiology, immunology, and infection = Wei mian yu gan ran za zhi 2021-02-27
B Bernard La Scola 1 ['IHU-Méditerranée Infection, Marseille, France; Aix Marseille Univ., IRD, AP-HM, MEPHI, Marseille, France.']
Low blood zinc concentrations in patients with poor clinical outcome during SARS-CoV-2 infection: is there a need to supplement with zinc COVID-19 patients? Journal of microbiology, immunology, and infection = Wei mian yu gan ran za zhi 2021-02-27
M Madhavi Ampajwala 1 ['Village Health Partners, Plano, TX, USA.']
A Rapid, High-Sensitivity SARS-CoV-2 Nucleocapsid Immunoassay to Aid Diagnosis of Acute COVID-19 at the Point of Care: A Clinical Performance Study. Infectious diseases and therapy 2021-02-26
C Christopher Chappel 1 ['Chappel Group, Kissimmee, FL, U