In [None]:
pip install biopython

Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import json
import re
import numpy as np
#import csv

In [None]:
# Biopython entrez

In [None]:
from Bio import Entrez

def search(query): # scraping
    Entrez.email = 'xji1@dons.usfca.edu'
    handle = Entrez.esearch(db='pubmed', 
                            sort='relevance',
                            retmax='245',
                            retmode='xml', 
                            term=query)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):  
    ids = ','.join(id_list)
    Entrez.email = 'xji1@dons.usfca.edu'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

if __name__ == '__main__': # parsing 
    results = search(['randomized control* [abstract] AND (insomnia OR sleep) AND (depressi* OR anxiety) AND (Herb OR mind-body OR acup* OR music OR mindfulness OR meditation OR Tai ji OR tai chi OR qigong OR yoga OR hypnotherapy OR massage OR manipulation OR traditional Chinese Medicine OR homeopathy OR relaxation OR guided imagery) AND 2010:2021[Date - Publication]'])
    id_list = results['IdList'] # a list of UIDs from fetching
    papers = fetch_details(id_list)
    for i, paper in enumerate(papers['PubmedArticle']):
        print("%d) %s" % (i+1, paper['MedlineCitation']['Article']['ArticleTitle']))

In [None]:
# get journal name from API records
CAMJournal=[]
for i, paper in enumerate(results['PubmedArticle']):
    try:
        if 'ISOAbbreviation' in paper['MedlineCitation']['Article']['Journal']:
          CAMJournal.append(paper['MedlineCitation']['Article']['Journal']['ISOAbbreviation'])
        else:
          CAMJournal.append('NaN')

    except KeyError as e:
        CAMJournal.append('NaN')
CAMJournal

In [None]:
# get abstracts
CAMAb=[]
for i, paper in enumerate(results['PubmedArticle']):
    try:
        if 'AbstractText' in paper['MedlineCitation']['Article']['Abstract']:
            CAMAb.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'])
        else:
            CAMAb.append('NaN')
    except KeyError as e:
        CAMAb.append('NaN')
CAMAb

In [None]:
# get titles
CAMtitles=[]
for i, paper in enumerate(results['PubmedArticle']):
    try:
        if 'ArticleTitle' in paper['MedlineCitation']['Article']:
            CAMtitles.append(paper['MedlineCitation']['Article']['ArticleTitle'])
        else:
           CAMtitles.append('NaN')
    except KeyError as e:
           CAMtitles.append('NaN')    
CAMtitles

In [None]:
# Check if journal, abstract and title aligns 
len(CAMJournal)
len(CAMAb)
len(CAMtitles)

In [None]:
combdata= list(zip(CAMtitles, CAMJournal,CAMAb)) 
df = pd.DataFrame(combdata,columns = ['Title', 'JournalInfo','Abstract']) 
len(df)

In [None]:
df['Abstract'][1]

[StringElement('and Purpose: There were studies showing the positive effects of Tai Chi on sleep, fatigue, endurance, balance, anxiety and depression in rheumatologic diseases. The aim of this study was to investigate the effects of Tai Chi exercise program on trunk endurance, balance, sleep, fatigue, anxiety and depression in patients with systemic sclerosis (SSc).', attributes={'Label': 'BACKGROUND', 'NlmCategory': 'BACKGROUND'}),
 StringElement('28 patients were randomly divided into two groups as Tai Chi (n:14, 53.35\xa0±\xa010.86 years) and home exercise group (n:14, 52.64\xa0±\xa09.45 years). Trunk Lateral Endurance Test, Berg Balance Scale, Pittsburg Sleep Quality Index, Fatigue Severity Scale and Fatigue Impact Scale, Hospital Anxiety and Depression Scale was used for assesment. All evaluations were performed at baseline and at the end of the 10th week.', attributes={'Label': 'MATERIALS AND METHODS', 'NlmCategory': 'METHODS'}),
 StringElement('After training, a statistically si

In [None]:
df['Abstract'] = df.Abstract.apply(lambda x: ' '.join(x))

In [None]:
def remove_brackets(x):
    remove = '<>()[]{}"'
    p = re.compile("[" + re.escape(remove) + "]")
    return p.sub("",x)

In [None]:
df['JournalInfo'] = df.JournalInfo.apply(lambda x: json.dumps(x))
type(df.JournalInfo[0])

str

In [None]:
df['Abstract'] = df.Abstract.apply(lambda x: remove_brackets(x))
type(df.Abstract[0])

str

In [None]:
#df['Title'] = df.Title.apply(lambda x: json.dumps(x))
#type(df.Title[0])

In [None]:
df.head(20)

Unnamed: 0,Title,JournalInfo,Abstract
0,Qigong or Tai Chi in Cancer Care: an Updated S...,Curr Oncol Rep,Qigong and Tai Chi are two increasingly popula...
1,Investigation of the effectiveness of Tai Chi ...,Complement Ther Clin Pract,and Purpose: There were studies showing the po...
2,Tai Chi and Qigong for cancer-related symptoms...,J Cancer Surviv,This study aims to summarize and critically ev...
3,The effectiveness of tai chi in breast cancer ...,Complement Ther Clin Pract,Tai chi has been suggested as a potential effe...
4,Effect of simplified Tai Chi exercise on relie...,J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ..."
5,Effect of tai chi versus aerobic exercise for ...,BMJ,To determine the effectiveness of tai chi inte...
6,Effectiveness of Tai Chi on fibromyalgia patie...,Complement Ther Med,To identify empirical evidence on the effectiv...
7,Does Tai Chi Chuan improve psychological well-...,Medicine Baltimore,Breast cancer is the most prevalent cancer in ...
8,Effect of Tai Chi for post-stroke mental disor...,Medicine Baltimore,Post-stroke mental disorders (PSMDs) and post-...
9,The effects of guided imagery on state and tra...,Complement Ther Med,This study aimed to investigate the effects of...


In [None]:
#df.head()

In [None]:
# Write a new columne 'Til' : labelling based on title (lable papers titled with 'protocol','systematic review','meta-analysis' or 'guideline' or 'qualitative' as 0, else 1)

In [None]:
# could be taliored for target pouplation (children/youth/adolescent or only adults)

In [None]:
df['TiL'] = np.where(df['Title'].str.lower().str.contains("meta-analysis|systematic review|protocol|guideline|qualitative"), 0, 1)
df.head()

Unnamed: 0,Title,JournalInfo,Abstract,TiL
0,Qigong or Tai Chi in Cancer Care: an Updated S...,Curr Oncol Rep,Qigong and Tai Chi are two increasingly popula...,0
1,Investigation of the effectiveness of Tai Chi ...,Complement Ther Clin Pract,and Purpose: There were studies showing the po...,1
2,Tai Chi and Qigong for cancer-related symptoms...,J Cancer Surviv,This study aims to summarize and critically ev...,0
3,The effectiveness of tai chi in breast cancer ...,Complement Ther Clin Pract,Tai chi has been suggested as a potential effe...,0
4,Effect of simplified Tai Chi exercise on relie...,J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ...",1


In [None]:
type(df["TiL"])

pandas.core.series.Series

In [None]:
df.to_pickle('save_path')

In [None]:
df = pd.read_pickle('save_path')

In [None]:
#manully input value to new colunm 'AbL' (labelling based on abstract) examples

In [None]:
df_Abs['AbL'] = 1

In [None]:
#df_Abs['Abstract'][9]:  not CAM only CBT

In [None]:
df_Abs.at[9,'AbL'] = 0

In [None]:
#df_Abs['Abstract'][14] : a review

In [None]:
df_Abs.at[14,'AbL'] = 0

In [None]:
#df_Abs['Abstract'][16]: MBCT,qinggo CT incoporating with CBT

In [None]:
df_Abs.at[16,'AbL'] = 0

In [None]:
# df_Abs['Abstract'][20]: only CBT-I

In [None]:
df_Abs.at[20,'AbL'] = 0

In [None]:
#df_Abs['Abstract'][21]: cross-sectional analysis of the RCT baseline

In [None]:
df_Abs.at[21,'AbL'] = 0

In [None]:
df_Abs.AbL