# Main Program
## Search PubMed with a given list of grant numbers
#### All functions used can be referenced in search_mthds notebook 

In [2]:
%run search_mthds.ipynb

import pandas as pd
import time
from tqdm import tqdm

template_df = pd.DataFrame(columns = ['title', 'authors', 'dois', 'keywords', 'project_serial_num', 'journal_name', 'publication_year']) #create an empty dataframe where grant article info will be stored
grant_list = ['AA029328','AA029331', 'AA029345','AA029324','AA029316','AA029348','DC019579','DC019578','DC019573','DC016112','TR003780',
'TR003807','TR003793','HL119145','NR020105','DE031114','MD016526','DK130067','HL150852','DE030841','DE030842','HL152410',
'HL152401','DE030829','DE030852','DE030832','HD105618', 'HD105593','HD105594','HD105591','HD105619','HD105590','HD105613',
'HD105610','TR003775','TR003795','TR003787','TR003812','DA053976','DA053949','DA053941','DA053903','DA053893','LM013129','DA053899',
'ES103366', 'LM013755']
#grant list is subject to change and can be more general, but this form of grant numbers give the best results


### Main loop creating data table

In [3]:
for grant in tqdm(grant_list): #Create a loop to iterate and gather information for each grant number from the list
    my_ids = return_ids(grant) #grab article PubMed ID list
    if my_ids == 'Nothing Found': #move onto the next grant number if no articles were found
        time.sleep(1)
        continue
    art_info = get_summary(my_ids) #gather all article information via esummary
    titles = get_titles(art_info) #gather titles of all articles; return dictionary 
    authors = get_authors(art_info) #gather authors of all articles
    dois = get_doi(art_info) #gather all dois and additional article identifier if available
    keywords = get_keywords(my_ids) #gather all keywords for each article
    journals = get_journal(art_info)
    publication_yrs = get_publication_yr(art_info)
    mydf = pd.DataFrame.from_dict(titles, orient='index', columns = ['title']) #initialize a blank dataframe with PubMed ids as row indices and a title column
    mydf['authors'] = pd.Series(authors) #add each column with the stored information above
    mydf['dois'] = pd.Series(dois)
    mydf['keywords'] = pd.Series(keywords)
    mydf['project_serial_num'] = grant #make a new column with the grant number that corresponds to the current iteration of articles
    mydf['journal_name'] = pd.Series(journals)
    mydf['publication_year'] = pd.Series(publication_yrs)
    template_df = pd.concat([template_df, mydf], ignore_index = False) #append the current df to template dataframe
    time.sleep(1) #need to wait to avoid hitting the limit of 3 requests per second



  0%|          | 0/47 [00:00<?, ?it/s]

100%|██████████| 47/47 [03:42<00:00,  4.73s/it]


### Write the dataframe to a csv file in your current working directory

In [4]:
template_df.to_csv('article_data_new.csv')