In [1]:
import pandas as pd
import sqlite3
from IPython.display import HTML
%matplotlib inline

In [2]:
def make_clickable(val):
    return '<a href="{}">{}</a>'.format(val,val)

In [3]:
conn = sqlite3.connect('papers.db')
test = pd.read_sql_query('SELECT * FROM papers', conn)

In [4]:
target = pd.read_csv('ipfjes-case-control-studies.csv', usecols=['pmid','title','firstauthor','lastauthor',
                                                             'journal','pubdate','pubtype',
                                                             'abstract', 'keywords', 'rcr', 'citedby', 'cites'])

In [5]:
target['target'] = 1

test['target'] = 0

training = pd.concat([target, test.head(14)])

from sklearn.feature_extraction.text import CountVectorizer
count_vectorizer = CountVectorizer()
counts = count_vectorizer.fit_transform(training['abstract'].values)

from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
targets = training['target'].values
classifier.fit(counts, targets)

test_counts = count_vectorizer.transform(test['abstract'].values)
predictions = classifier.predict(test_counts)
predictions_prob = classifier.predict_proba(test_counts)

In [6]:
test['testresult'] = predictions
test['testprob'] = pd.DataFrame(predictions_prob)[1]
test['link'] = 'https://www.ncbi.nlm.nih.gov/pubmed/' + test['pmid']

In [7]:
results = test[(test.testresult == 1) & (test.date.str.contains('2017'))].sort_values(by='testprob', ascending=False)
results[['title', 'link']].style.format(make_clickable) # clickable results are important

Unnamed: 0,title,link
1510,Diffuse Pulmonary Ossification in Fibrosing Interstitial Lung Diseases: Prevalence and Associations.,https://www.ncbi.nlm.nih.gov/pubmed/28182861
4560,The diagnostic importance of the bronchoalveolar lavage in lymphocytic alveolitis.,https://www.ncbi.nlm.nih.gov/pubmed/27471881
3244,"Cholesterol, lipoproteins and subclinical interstitial lung disease: the MESA study.",https://www.ncbi.nlm.nih.gov/pubmed/28130491
86,Effect of statins on disease-related outcomes in patients with idiopathic pulmonary fibrosis.,https://www.ncbi.nlm.nih.gov/pubmed/27708114
99,Investigation of viral infection in idiopathic pulmonary fibrosis among Iranian patients in Tehran.,https://www.ncbi.nlm.nih.gov/pubmed/28115263
3426,Histologist's original opinion compared with multidisciplinary team in determining diagnosis in interstitial lung disease.,https://www.ncbi.nlm.nih.gov/pubmed/27815523
3025,Interstitial Lung Disease in the Elderly.,https://www.ncbi.nlm.nih.gov/pubmed/27865876


In [18]:
results = test[(test.testresult == 1)].sort_values(by='testprob', ascending=False)
pd.set_option('max_colwidth',1000)
result[['pmid', 'date', 'author', 'title', 'journal']]


Unnamed: 0,pmid,date,author,title,journal
22,25165873,2014 Oct,"Hutchinson JP, McKeever TM, Fogarty AW, Navaratnam V, Hubbard RB",Increasing global mortality from idiopathic pulmonary fibrosis in the twenty-first century.,Annals of the American Thoracic Society
86,27708114,2017 Feb,"Kreuter M, Bonella F, Maher TM, Costabel U, Spagnolo P, Weycker D, Kirchgaessler KU, Kolb M",Effect of statins on disease-related outcomes in patients with idiopathic pulmonary fibrosis.,Thorax
89,24002055,2014 Mar,"Navaratnam V, Fogarty AW, McKeever T, Thompson N, Jenkins G, Johnson SR, Dolan G, Kumaran M, Pointon K, Hubbard RB",Presence of a prothrombotic state in people with idiopathic pulmonary fibrosis: a population-based case-control study.,Thorax
90,26176338,2015 Sep,"Gavini S, Finn RT, Lo WK, Goldberg HJ, Burakoff R, Feldman N, Chan WW",Idiopathic pulmonary fibrosis is associated with increased impedance measures of reflux compared to non-fibrotic disease among pre-lung transplant patients.,Neurogastroenterology and motility : the official journal of the European Gastrointestinal Motility Society
99,28115263,2017 Mar,"Moradi P, Keyvani H, Javad Mousavi SA, Karbalaie Niya MH, Esghaei M, Bokharaei-Salim F, Ataei-Pirkooh A, Monavari SH",Investigation of viral infection in idiopathic pulmonary fibrosis among Iranian patients in Tehran.,Microbial pathogenesis
