# Search Pubmed

## Define Search Criteria

In [1]:
from Bio import Entrez
import numpy as np

def search(query):
    Entrez.email = 'your.email@example.com'
    handle = Entrez.esearch(db='pubmed', 
                            sort='most recent', 
                            retmax='5000',
                            retmode='xml', 
                            reldate = 7, #only within n days from now
                            term=query)
    results = Entrez.read(handle)
    return results

#search terms (can test string with Pubmed Advanced Search)
search_results = search('(Biomech*[Title/Abstract] OR locomot*[Title/Abstract])')

## Perform Search and Save Paper Titles

In [2]:


def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your.email@example.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

id_list = search_results['IdList']
papers = fetch_details(id_list)
print("")
titles = [0 for i in enumerate(papers['PubmedArticle'])]
keywords = ['' for i in enumerate(papers['PubmedArticle'])]
authors = ['' for i in enumerate(papers['PubmedArticle'])]
links = ['' for i in enumerate(papers['PubmedArticle'])]
journals = ['' for i in enumerate(papers['PubmedArticle'])]


for i, paper in enumerate(papers['PubmedArticle']):
    titles[i] = papers['PubmedArticle'][i]['MedlineCitation']['Article']['ArticleTitle']
print(np.size(titles),'Papers found')


160 Papers found


## Pull information from PubMed Results
#### Format title, journal, authors in markdown friendly manner

In [3]:
for i, paper in enumerate(papers['PubmedArticle']):
    links[i] = "* [%s](https://www.ncbi.nlm.nih.gov/pubmed/%s)" % (paper['MedlineCitation']['Article']['ArticleTitle'],paper['MedlineCitation']['PMID'])
    auths = []
    for auth in paper['MedlineCitation']['Article']['AuthorList']:
        try:
            auth_name = [auth['LastName'],auth['Initials']+',']
            auth_name = ' '.join(auth_name)
            auths.append(auth_name)
        except:
            print(paper['MedlineCitation']['Article']['ArticleTitle'],'- NAME ERROR')
    authors[i] = ' '.join(auths)
    journals[i] = '*%s*' % (paper['MedlineCitation']['Article']['Journal']['Title']) 
    #store keywords 
    if paper['MedlineCitation']['KeywordList'] != []:
        kwds = []
        for kw in paper['MedlineCitation']['KeywordList'][0]:
            kwds.append(kw[:])         
        keywords[i] = ' '.join(kwds)

Predictors of Healing Ligament Size and Magnetic Resonance Signal Intensity at 6 Months After Bridge-Enhanced Anterior Cruciate Ligament Repair. - NAME ERROR


### Markdown Example:

* [Skeletal muscles of hibernating black bears show minimal atrophy and phenotype shifting despite prolonged physical inactivity and starvation.](https://www.ncbi.nlm.nih.gov/pubmed/30998788)
Miyazaki M,
Shimozuru M,
Tsubota T,
*PloS one*
<br>  

* [Phase space methods for non-linear analysis of pedalling forces in cycling.](https://www.ncbi.nlm.nih.gov/pubmed/30998746)
Kunert A,
Ott M,
Reuter T,
Koska D,
Maiwald C,
*PloS one*
<br>  

# Clean up title and keyword strings

In [4]:
print('Number of Papers: ',np.size(titles,0)) #number of papers. limited to 500. (retmax)
#clean up titles
import re
titles = [t.lower() for t in titles] #same case
titles = [t.replace('<sub>',' ').replace('</sub>','') for t in titles] #subscript
titles = [t.replace('<i>',' ').replace('</i>','') for t in titles] #italics
titles = [t.replace('[','').replace(']','') for t in titles] #remove brackets from html parser
#clean up keywords
keywords = [k.lower() for k in keywords] #same case


Number of Papers:  160


# Load Top-performing Model

In [5]:
from keras.models import model_from_json
# load json and create model
json_file = open('Models/keras_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights("Models/keras_model.h5")
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
print("\nLoaded model from disk")
 

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Loaded model from disk


# Load Associated Vectorizer

In [6]:
import pickle
# import numpy as np
from sklearn.preprocessing import LabelEncoder
#load vectorizer and label encoder
vect = pickle.load(open('Models/keras_Vectorizer.pkl','rb'))
le = LabelEncoder()
le.classes_   = np.load('Models/keras_LabelEncoder.npy')
print('\nLoaded Vectorizer')



Loaded Vectorizer


# Vectorize Strings

In [7]:
#get titles for this week's literature update
import pandas as pd
papers_df = pd.DataFrame({'title': titles, 'keywords': keywords})
#join keywords with titles
papers_df['everything'] = papers_df['title'] + ' ' + papers_df['keywords']

#vectorize 
titles_vec = vect.transform(papers_df['everything'])
#OR if you don't want to use just the title:
# titles_vec = vect.transform(papers_df['title'])

# Predict Topics For Each Paper

In [8]:
prediction_vec = model.predict(titles_vec)
topics = [le.inverse_transform([np.argmax(top_val)])[0] for top_val in model.predict(titles_vec)]
papers_df['topic'] = topics
papers_df[['title','topic']].head()

Unnamed: 0,title,topic
0,the influence of childhood obesity on spatio-t...,GAIT/LOCOMOTION
1,"two degrees of freedom, dynamic, hand-wrist em...",MODELING
2,biomechanical properties of pedicle screw fixa...,ORTHOPAEDICS/SURGERY
3,demystifying the use of self expandable interw...,CARDIOVASCULAR/CARDIOPULMONARY
4,behavioral profile of intermittent vs continuo...,GAIT/LOCOMOTION


# Save Paper Titles and Topics

### Store everything in DataFrame and sort by Topic

In [9]:
#add info for github markdown format
papers_df['authors'] = authors
papers_df['journal'] = journals
papers_df['links'] = links
#generate filename
import datetime
now = datetime.datetime.now()
strings = [str(now.year), str(now.month), str(now.day),'litupdate.csv']
fname = 'Literature_Updates/'+'-'.join(strings)
strings = [str(now.year), str(now.month), str(now.day),'litupdate.md']
mdname = 'Literature_Updates/'+'-'.join(strings)
strings = [str(now.year), str(now.month), str(now.day),'litupdate']
urlname = '-'.join(strings)

print('Filename: ',fname)

# papers_df = papers_df.sort_values(by = ['topic'])
# papers_df = papers_df.reset_index(drop = True)
papers_df.head()

Filename:  Literature_Updates/2019-4-23-litupdate.csv


Unnamed: 0,title,keywords,everything,topic,authors,journal,links
0,the influence of childhood obesity on spatio-t...,children gait obesity,the influence of childhood obesity on spatio-t...,GAIT/LOCOMOTION,"Montes-Alguacil J, Páez-Moguer J, Jiménez Cebr...",*Gait & posture*,* [The influence of childhood obesity on spati...
1,"two degrees of freedom, dynamic, hand-wrist em...",emg signal processing emg-force electromyogram,"two degrees of freedom, dynamic, hand-wrist em...",MODELING,"Dai C, Zhu Z, Martinez-Luna C, Hunt TR, Farrel...",*Journal of electromyography and kinesiology :...,"* [Two degrees of freedom, dynamic, hand-wrist..."
2,biomechanical properties of pedicle screw fixa...,allograft biochemical phenomena bone transplan...,biomechanical properties of pedicle screw fixa...,ORTHOPAEDICS/SURGERY,"Jia C, Zhang R, Xing T, Gao H, Li H, Dong F, Z...",*The spine journal : official journal of the N...,* [Biomechanical properties of pedicle screw f...
3,demystifying the use of self expandable interw...,atherosclerosis peripheral arterial disease st...,demystifying the use of self expandable interw...,CARDIOVASCULAR/CARDIOPULMONARY,"Peker A, Balendran B, Paraskevopoulos I, Kroki...",*Annals of vascular surgery*,* [Demystifying the Use of Self Expandable Int...
4,behavioral profile of intermittent vs continuo...,learning anxiety high fat locomotion memory,behavioral profile of intermittent vs continuo...,GAIT/LOCOMOTION,"Blanco-Gandía MC, Miñarro J, Rodríguez-Arias M,",*Behavioural brain research*,* [Behavioral profile of intermittent vs conti...


### Save as .csv 

In [65]:
# header = ['title','topic']
# papers_df.sort_values('topic').to_csv(fname, index = False, columns = header)
# print('\nLiterature Update Exported as .csv')


['BONE' 'CARDIOVASCULAR/CARDIOPULMONARY' 'CELLULAR/SUBCELLULAR'
 'COMPARATIVE' 'DENTAL/ORAL/FACIAL' 'EVOLUTION/ANTHROPOLOGY'
 'GAIT/LOCOMOTION' 'HAND/FINGER/FOOT/TOE' 'JOINT/CARTILAGE' 'METHODS'
 'MODELING' 'MUSCLE' 'NEURAL' 'ORTHOPAEDICS/SPINE' 'ORTHOPAEDICS/SURGERY'
 'POSTURE/BALANCE' 'PROSTHETICS/ORTHOTICS' 'REHABILITATION' 'ROBOTICS'
 'SPORT/EXERCISE' 'TENDON/LIGAMENT' 'TISSUE/BIOMATERIAL' 'TRAUMA/IMPACT'
 'UNIQUE TOPIC' 'VETERINARY/AGRICULTURAL' 'VISUAL/VESTIBULAR/EYE']


# Compile papers grouped by topic

In [15]:
#header for alcantarar.github.io literature update site:
print('---',file=open(mdname,"w"))
print('title: Biomechanics Literature Update',file=open(mdname,"a"))
print('collection: literature',file=open(mdname,"a"))
print('permalink: /literature/'+urlname,file=open(mdname,"a"))
print('excerpt: <br>',file=open(mdname,"a"))
print('---',file=open(mdname,"a"))

#tidy up topic strings
topic_list = np.unique(papers_df.sort_values('topic')['topic'])
ss = [s for s in topic_list if 'UNIQUE' in s]
for i,t in enumerate(topic_list):
    if 'UNIQUE' in t:  
        topic_list[i] = 'UNIQUE TOPIC'
    if 'IMPACT' in t:
        topic_list[i] = 'TRAUMA/IMPACT'

# print('# Literature Update: ',str(now.year)+'-'+str(now.month)+'-'+str(now.day),'  ', file = open(mdname,'w'))
print('### Created by: [Ryan Alcantara](https://twitter.com/Ryan_Alcantara_) & [Gary Bruening](https://twitter.com/garebearbru) - University of Colorado Boulder', file=open(mdname, "a"))
print('### Table Of Contents: ', file=open(mdname, "a"))
for topic in topic_list:
    print('['+topic+']'+'(#'+str.lower(topic).replace('/','').replace(' ','')+')  ', file=open(mdname, "a"))
print('', file=open(mdname,"a"))
for topic in topic_list:
    print('----', file=open(mdname, "a"))
    print('#',topic, file=open(mdname, "a"))
    print('----', file=open(mdname, "a"))
    print('', file=open(mdname, "a"))
    print('[Back to top](#table-of-contents)', file=open(mdname, "a"))
    print('', file=open(mdname, "a"))
    papers_subset = pd.DataFrame(papers_df[papers_df.topic == topic].reset_index(drop = True))
    for i,paper in enumerate(papers_subset['links']):
        print(paper, file=open(mdname, "a"))
        print(papers_subset['authors'][i].encode('utf8'), file=open(mdname, "a"))
        print(papers_subset['journal'][i]+'.  ', file=open(mdname, "a"))
#         print('<br>  ', file=open("output.md", "a"))
        print('', file=open(mdname, "a"))
    
print('Literature Update Exported as Markdown')

Literature Update Exported as Markdown
