In [26]:
import pandas as pd
import sqlite3
from IPython.display import HTML
%matplotlib inline

In [28]:
def make_clickable(val):
    return '<a href="{}">{}</a>'.format(val,val)

In [2]:
conn = sqlite3.connect('papers.db')
test = pd.read_sql_query('SELECT * FROM papers', conn)

In [3]:
target = pd.read_csv('ipfjes-case-control-studies.csv', usecols=['pmid','title','firstauthor','lastauthor',
                                                             'journal','pubdate','pubtype',
                                                             'abstract', 'keywords', 'rcr', 'citedby', 'cites'])

In [4]:
target['target'] = 1

test['target'] = 0

training = pd.concat([target, test.head(14)])

from sklearn.feature_extraction.text import CountVectorizer
count_vectorizer = CountVectorizer()
counts = count_vectorizer.fit_transform(training['abstract'].values)

from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
targets = training['target'].values
classifier.fit(counts, targets)

test_counts = count_vectorizer.transform(test['abstract'].values)
predictions = classifier.predict(test_counts)
predictions_prob = classifier.predict_proba(test_counts)

In [41]:
test['testresult'] = predictions
test['testprob'] = pd.DataFrame(predictions_prob)[1]
test['link'] = 'https://www.ncbi.nlm.nih.gov/pubmed/' + test['pmid']

In [49]:
results = test[(test.testresult == 1) & (test.date.str.contains('2017'))].sort_values(by='testprob', ascending=False)
results[['title', 'link']].style.format(make_clickable) # clickable results are important

Unnamed: 0,title,link
1510,Diffuse Pulmonary Ossification in Fibrosing Interstitial Lung Diseases: Prevalence and Associations.,https://www.ncbi.nlm.nih.gov/pubmed/28182861
4560,The diagnostic importance of the bronchoalveolar lavage in lymphocytic alveolitis.,https://www.ncbi.nlm.nih.gov/pubmed/27471881
3244,"Cholesterol, lipoproteins and subclinical interstitial lung disease: the MESA study.",https://www.ncbi.nlm.nih.gov/pubmed/28130491
86,Effect of statins on disease-related outcomes in patients with idiopathic pulmonary fibrosis.,https://www.ncbi.nlm.nih.gov/pubmed/27708114
99,Investigation of viral infection in idiopathic pulmonary fibrosis among Iranian patients in Tehran.,https://www.ncbi.nlm.nih.gov/pubmed/28115263
3426,Histologist's original opinion compared with multidisciplinary team in determining diagnosis in interstitial lung disease.,https://www.ncbi.nlm.nih.gov/pubmed/27815523
3025,Interstitial Lung Disease in the Elderly.,https://www.ncbi.nlm.nih.gov/pubmed/27865876


In [50]:
results.head()

Unnamed: 0,pmid,author,date,title,journal,key_words,pub_type,abstract,target,testresult,testprob,link
1510,28182861,"Egashira R, Jacob J, Kokosi MA, Brun AL, Rice ...",2017 Jul,Diffuse Pulmonary Ossification in Fibrosing In...,Radiology,"Aged, Biopsy, Female, Humans, Idiopathic Pulmo...",Journal Article,Purpose To investigate the prevalence of diffu...,0,1,1.0,https://www.ncbi.nlm.nih.gov/pubmed/28182861
4560,27471881,"Mlika M, Kria N, Braham E, Chebbi C, El Mezni F",2017,The diagnostic importance of the bronchoalveol...,Journal of immunoassay & immunochemistry,"Adolescent, Adult, Aged, Bronchoalveolar Lavag...",Journal Article,Multidisciplinary concertation is mandatory in...,0,1,0.999997,https://www.ncbi.nlm.nih.gov/pubmed/27471881
3244,28130491,"Podolanczuk AJ, Raghu G, Tsai MY, Kawut SM, Pe...",2017 May,"Cholesterol, lipoproteins and subclinical inte...",Thorax,"Aged, Aged, 80 and over, Biomarkers/blood, Cho...","Letter, Multicenter Study",We investigated associations of plasma lipopro...,0,1,0.999995,https://www.ncbi.nlm.nih.gov/pubmed/28130491
86,27708114,"Kreuter M, Bonella F, Maher TM, Costabel U, Sp...",2017 Feb,Effect of statins on disease-related outcomes ...,Thorax,"Adult, Aged, Aged, 80 and over, Disease Progre...","Clinical Trial, Phase III, Journal Article, Ra...",BACKGROUND: Data are conflicting regarding the...,0,1,0.82978,https://www.ncbi.nlm.nih.gov/pubmed/27708114
99,28115263,"Moradi P, Keyvani H, Javad Mousavi SA, Karbala...",2017 Mar,Investigation of viral infection in idiopathic...,Microbial pathogenesis,"Adenoviruses, Human/*isolation & purification,...",Journal Article,AIM OF THE STUDY: Idiopathic pulmonary fibrosi...,0,1,0.809333,https://www.ncbi.nlm.nih.gov/pubmed/28115263
