In [1]:
pip install biopython

Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import json
import re
import numpy as np
import pickle

In [3]:
# Biopython entrez

In [4]:
from Bio import Entrez

def search(query): # scraping
    Entrez.email = 'xji1@dons.usfca.edu'
    handle = Entrez.esearch(db='pubmed', 
                            sort='relevance',
                            retmax='245',
                            retmode='xml', 
                            term=query)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):  
    ids = ','.join(id_list)
    Entrez.email = 'xji1@dons.usfca.edu'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

if __name__ == '__main__': # parsing 
    results = search(['randomized control* [abstract] AND (insomnia OR sleep) AND (depressi* OR anxiety) AND (Herb OR mind-body OR acup* OR music OR mindfulness OR meditation OR Tai ji OR tai chi OR qigong OR yoga OR hypnotherapy OR massage OR manipulation OR traditional Chinese Medicine OR homeopathy OR relaxation OR guided imagery) AND 2018:2021[Date - Publication]'])
    id_list = results['IdList'] # a list of UIDs from fetching
    papers = fetch_details(id_list)
    for i, paper in enumerate(papers['PubmedArticle']):
        print("%d) %s" % (i+1, paper['MedlineCitation']['Article']['ArticleTitle']))

1) Qigong or Tai Chi in Cancer Care: an Updated Systematic Review and Meta-analysis.
2) Investigation of the effectiveness of Tai Chi exercise program in patients with scleroderma: A randomized controlled study.
3) Tai Chi and Qigong for cancer-related symptoms and quality of life: a systematic review and meta-analysis.
4) The effectiveness of tai chi in breast cancer patients: A systematic review and meta-analysis.
5) Effect of simplified Tai Chi exercise on relieving symptoms of patients with mild to moderate Parkinson's disease.
6) Effect of tai chi versus aerobic exercise for fibromyalgia: comparative effectiveness randomized controlled trial.
7) Effectiveness of Tai Chi on fibromyalgia patients: A meta-analysis of randomized controlled trials.
8) Does Tai Chi Chuan improve psychological well-being and quality of life in patients with breast cancer? Protocol for a systematic review of randomized controlled trials: A Protocol for Systematic Review and Meta-Analysis.
9) Effect of Tai

In [5]:
CAMJournal=[]
for i, paper in enumerate(papers['PubmedArticle']):
    CAMJournal.append(paper['MedlineCitation']['Article']['Journal']['ISOAbbreviation'])
CAMJournal

['Curr Oncol Rep',
 'Complement Ther Clin Pract',
 'J Cancer Surviv',
 'Complement Ther Clin Pract',
 'J Sports Med Phys Fitness',
 'BMJ',
 'Complement Ther Med',
 'Medicine (Baltimore)',
 'Medicine (Baltimore)',
 'Complement Ther Med',
 'J Integr Med',
 'BMJ Open',
 'Clin J Oncol Nurs',
 'J Perianesth Nurs',
 'Trials',
 'J Sleep Res',
 'Zhen Ci Yan Jiu',
 'Medicine (Baltimore)',
 'Medicine (Baltimore)',
 'Trials',
 'Complement Ther Med',
 'J Bodyw Mov Ther',
 'BMJ Open',
 'Trials',
 'Behav Cogn Psychother',
 'Complement Ther Med',
 'Psychol Health Med',
 'Medicine (Baltimore)',
 'Trials',
 'Int J Environ Res Public Health',
 'Acupunct Med',
 'Zhongguo Zhong Yao Za Zhi',
 'Trials',
 'Medicine (Baltimore)',
 'J Appl Gerontol',
 'Clin Gerontol',
 'Int J Yoga Therap',
 'BMC Psychiatry',
 'J Affect Disord',
 'Trials',
 'J Perianesth Nurs',
 'Complement Ther Clin Pract',
 'Trials',
 'Int J Rheum Dis',
 'Worldviews Evid Based Nurs',
 'Adv Mind Body Med',
 'Medicine (Baltimore)',
 'Clin Rehab

In [6]:
CAMAb=[]
for i, paper in enumerate(papers['PubmedArticle']):
    if 'AbstractText' in paper['MedlineCitation']['Article']['Abstract']:
        CAMAb.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'])
    else:
        CAMAb.append('NaN')
CAMAb

[[StringElement('Qigong and Tai Chi are two increasingly popular mind-body interventions with the potential to address the multifaceted needs of cancer survivors. The aim of this updated review and meta-analysis was to quantitatively evaluate the treatment effects of Qigong/Tai Chi on cancer survivors since 2014.', attributes={'Label': 'PURPOSE OF REVIEW'}),
  StringElement('There were statistically significant and clinically meaningful effects in favor of Qigong/Tai Chi interventions for symptoms of fatigue and sleep quality. There were positive trends, but not statistically significant effects, observed for anxiety, stress, depressive symptoms, and overall quality of life (QOL). Cancer-related cognitive impairment is a common complaint among cancer survivors that has received increasing attention in this area in recent years. Qigong/Tai Chi in cancer care shows great promise with short-term effects in treating many cancer-related symptoms. Further methodologically sound trials with l

In [7]:
CAMtitles=[]
for i, paper in enumerate(papers['PubmedArticle']):
    CAMtitles.append(paper['MedlineCitation']['Article']['ArticleTitle'])
CAMtitles

['Qigong or Tai Chi in Cancer Care: an Updated Systematic Review and Meta-analysis.',
 'Investigation of the effectiveness of Tai Chi exercise program in patients with scleroderma: A randomized controlled study.',
 'Tai Chi and Qigong for cancer-related symptoms and quality of life: a systematic review and meta-analysis.',
 'The effectiveness of tai chi in breast cancer patients: A systematic review and meta-analysis.',
 "Effect of simplified Tai Chi exercise on relieving symptoms of patients with mild to moderate Parkinson's disease.",
 'Effect of tai chi versus aerobic exercise for fibromyalgia: comparative effectiveness randomized controlled trial.',
 'Effectiveness of Tai Chi on fibromyalgia patients: A meta-analysis of randomized controlled trials.',
 'Does Tai Chi Chuan improve psychological well-being and quality of life in patients with breast cancer? Protocol for a systematic review of randomized controlled trials: A Protocol for Systematic Review and Meta-Analysis.',
 'Effect

In [8]:
combdata= list(zip(CAMtitles, CAMJournal,CAMAb)) 
df = pd.DataFrame(combdata,columns = ['Title', 'JournalInfo','Abstract']) 
len(df)

244

In [9]:
df['Abstract'][1]

[StringElement('and Purpose: There were studies showing the positive effects of Tai Chi on sleep, fatigue, endurance, balance, anxiety and depression in rheumatologic diseases. The aim of this study was to investigate the effects of Tai Chi exercise program on trunk endurance, balance, sleep, fatigue, anxiety and depression in patients with systemic sclerosis (SSc).', attributes={'Label': 'BACKGROUND', 'NlmCategory': 'BACKGROUND'}),
 StringElement('28 patients were randomly divided into two groups as Tai Chi (n:14, 53.35\xa0±\xa010.86 years) and home exercise group (n:14, 52.64\xa0±\xa09.45 years). Trunk Lateral Endurance Test, Berg Balance Scale, Pittsburg Sleep Quality Index, Fatigue Severity Scale and Fatigue Impact Scale, Hospital Anxiety and Depression Scale was used for assesment. All evaluations were performed at baseline and at the end of the 10th week.', attributes={'Label': 'MATERIALS AND METHODS', 'NlmCategory': 'METHODS'}),
 StringElement('After training, a statistically si

In [10]:
df['Abstract'] = df.Abstract.apply(lambda x: ' '.join(x))

In [11]:
def remove_brackets(x):
    remove = '<>()[]{}"'
    p = re.compile("[" + re.escape(remove) + "]")
    return p.sub("",x)

In [12]:
df['JournalInfo'] = df.JournalInfo.apply(lambda x: json.dumps(x))
df['Title'] = df.Title.apply(lambda x: json.dumps(x))
type(df.JournalInfo[0])
type(df.Title[0])

str

In [13]:
df['JournalInfo'] = df.JournalInfo.apply(lambda x: remove_brackets(x))
type(df.Abstract[0])

str

In [14]:
#df['Title'] = df.Title.apply(lambda x: json.dumps(x))
#type(df.Title[0])

In [15]:
df.head(20)

Unnamed: 0,Title,JournalInfo,Abstract
0,"""Qigong or Tai Chi in Cancer Care: an Updated ...",Curr Oncol Rep,Qigong and Tai Chi are two increasingly popula...
1,"""Investigation of the effectiveness of Tai Chi...",Complement Ther Clin Pract,and Purpose: There were studies showing the po...
2,"""Tai Chi and Qigong for cancer-related symptom...",J Cancer Surviv,This study aims to summarize and critically ev...
3,"""The effectiveness of tai chi in breast cancer...",Complement Ther Clin Pract,Tai chi has been suggested as a potential effe...
4,"""Effect of simplified Tai Chi exercise on reli...",J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ..."
5,"""Effect of tai chi versus aerobic exercise for...",BMJ,To determine the effectiveness of tai chi inte...
6,"""Effectiveness of Tai Chi on fibromyalgia pati...",Complement Ther Med,To identify empirical evidence on the effectiv...
7,"""Does Tai Chi Chuan improve psychological well...",Medicine Baltimore,Breast cancer is the most prevalent cancer in ...
8,"""Effect of Tai Chi for post-stroke mental diso...",Medicine Baltimore,Post-stroke mental disorders (PSMDs) and post-...
9,"""The effects of guided imagery on state and tr...",Complement Ther Med,This study aimed to investigate the effects of...


In [16]:
#df.head()

In [17]:
# Write a new columne 'Til' : labelling based on title (lable papers titled with 'protocol','systematic review','meta-analysis' or 'guideline' as 0, else 1)

In [18]:
df['TiL'] = np.where(df['Title'].str.lower().str.contains("meta-analysis|systematic review|protocol|guideline"), 0, 1)
df.head()

Unnamed: 0,Title,JournalInfo,Abstract,TiL
0,"""Qigong or Tai Chi in Cancer Care: an Updated ...",Curr Oncol Rep,Qigong and Tai Chi are two increasingly popula...,0
1,"""Investigation of the effectiveness of Tai Chi...",Complement Ther Clin Pract,and Purpose: There were studies showing the po...,1
2,"""Tai Chi and Qigong for cancer-related symptom...",J Cancer Surviv,This study aims to summarize and critically ev...,0
3,"""The effectiveness of tai chi in breast cancer...",Complement Ther Clin Pract,Tai chi has been suggested as a potential effe...,0
4,"""Effect of simplified Tai Chi exercise on reli...",J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ...",1


In [19]:
type(df['TiL'][0])

numpy.int32

In [3]:
df.to_pickle('C:/Users/CAM1.pkl')
#df.to_pickle('C:/Users/kirak/Documents/School/USF/HS651/CAM1.pkl')

# Read pkl CAM1

In [8]:
df = pd.read_pickle('C:/Users/CAM1.pkl')
#df = pd.read_pickle('C:/Users/kirak/Documents/School/USF/HS651/CAM1.pkl')

In [9]:
df_Abs = df[df["TiL"] == 1]

In [10]:
df_Abs.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Title,JournalInfo,Abstract,TiL
1,1,1,Investigation of the effectiveness of Tai Chi ...,Complement Ther Clin Pract,and Purpose: There were studies showing the po...,1
4,4,4,Effect of simplified Tai Chi exercise on relie...,J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ...",1
5,5,5,Effect of tai chi versus aerobic exercise for ...,BMJ,To determine the effectiveness of tai chi inte...,1
9,9,9,The effects of guided imagery on state and tra...,Complement Ther Med,This study aimed to investigate the effects of...,1
12,12,12,"Guided Imagery: Reducing Anxiety, Depression, ...",Clin J Oncol Nurs,Cancer treatment can be a great source of anxi...,1


In [11]:
#manully input value to new colunm 'AbL' (labelling based on abstract)

In [12]:
df_Abs.loc[:,'AbL'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [26]:
#df_Abs['Abstract'][9]:  not CAM only CBT

In [27]:
df_Abs.at[9,'AbL'] = 0

In [28]:
#df_Abs['Abstract'][14] : a review

In [29]:
df_Abs.at[14,'AbL'] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.loc[index, col] = value


In [30]:
#df_Abs['Abstract'][16]: MBCT,qinggo CT incoporating with CBT

In [31]:
df_Abs.at[16,'AbL'] = 0

In [32]:
# df_Abs['Abstract'][20]: only CBT-I

In [33]:
df_Abs.at[20,'AbL'] = 0

In [34]:
#df_Abs['Abstract'][21]: cross-sectional analysis of the RCT baseline

In [35]:
df_Abs.at[21,'AbL'] = 0

In [36]:
#df_Abs['Abstract'][23]: prospective study

In [37]:
df_Abs.at[23,'AbL'] = 0

In [38]:
#df_Abs['Abstract'][25]: CBT plus shenmen magnetic stimulation  

In [39]:
df_Abs.at[25,'AbL'] = 0

In [40]:
#df_Abs['Abstract'][26]: IRT plus CBT-i

In [41]:
df_Abs.at[26,'AbL'] = 0

In [42]:
#df_Abs['Abstract'][30]:Bibliometric summary

In [43]:
df_Abs.at[30,'AbL'] = 0

In [44]:
#df_Abs['Abstract'][35] : Baseline analysis 

In [45]:
df_Abs.at[30,'AbL'] = 0

In [46]:
#df_Abs['Abstract'][36]:CBT + mindfulness

In [47]:
df_Abs.at[36,'AbL'] = 0

In [48]:
#df_Abs['Abstract'][48]: Brief behavioral therapy

In [49]:
df_Abs.at[48,'AbL'] = 0

In [50]:
#df_Abs['Abstract'][51]: review

In [51]:
df_Abs.at[51,'AbL'] = 0

In [52]:
#df_Abs['Abstract'][54]: review

In [53]:
df_Abs.at[54,'AbL'] = 0

In [54]:
#df_Abs['Abstract'][56]:review

In [55]:
df_Abs.at[56,'AbL'] = 0

In [56]:
#df_Abs['Abstract'][58]: MBCT

In [57]:
df_Abs.at[58,'AbL'] = 0

In [58]:
#df_Abs['Abstract'][64]:non-R ct

In [59]:
df_Abs.at[64,'AbL'] = 0

In [60]:
#df_Abs['Abstract'][65] : (EA)+SSRIs

In [61]:
df_Abs.at[65,'AbL'] = 0

In [62]:
#df_Abs['Abstract'][72]: MBCT

In [63]:
df_Abs.at[72,'AbL'] = 0

In [64]:
#df_Abs['Abstract'][76]: psychotherapy

In [65]:
df_Abs.at[76,'AbL'] = 0

In [66]:
#df_Abs['Abstract'][78]:not RCT

In [67]:
df_Abs.at[78,'AbL'] = 0

In [68]:
#df_Abs['Abstract'][80]: Not RCT

In [69]:
df_Abs.at[80,'AbL'] = 0

In [70]:
#df_Abs['Abstract'][83]:review

In [71]:
df_Abs.at[83,'AbL'] = 0

In [72]:
#df_Abs['Abstract'][84]: not RCT report

In [73]:
df_Abs.at[84,'AbL'] = 0

In [74]:
#df_Abs['Abstract'][87]: not RCT

In [75]:
df_Abs.at[84,'AbL'] = 0

In [76]:
#df_Abs['Abstract'][88]: CBT + ACT

In [77]:
df_Abs.at[88,'AbL'] = 0

In [78]:
#df_Abs['Abstract'][89]: Jiaotai pill + fluoxetine

In [79]:
df_Abs.at[89,'AbL'] = 0

In [80]:
#df_Abs['Abstract'][90]: Participants are children

In [81]:
df_Abs.at[90,'AbL'] = 0

In [82]:
#df_Abs['Abstract'][91]:not CAMs

In [83]:
df_Abs.at[91,'AbL'] = 0

In [84]:
#df_Abs['Abstract'][92]: not CAMs

In [85]:
df_Abs.at[92,'AbL'] = 0

In [86]:
#df_Abs['Abstract'][95]:acupuncture + sertraline hydrochloride

In [87]:
df_Abs.at[95,'AbL'] = 0

In [88]:
#df_Abs['Abstract'][97]: not RCT

In [89]:
df_Abs.at[97,'AbL'] = 0

In [90]:
#df_Abs['Abstract'][98]:JW-SZRD and lorazepam orally

In [91]:
df_Abs.at[98,'AbL'] = 0

In [92]:
#df_Abs['Abstract'][99]: Review

In [93]:
df_Abs.at[99,'AbL'] = 0

In [94]:
#df_Abs['Abstract'][100]: study on children

In [95]:
df_Abs.at[100,'AbL'] = 0

In [96]:
#df_Abs['Abstract'][105]: not a RCT report

In [97]:
df_Abs.at[105,'AbL'] = 0

In [98]:
#df_Abs['Abstract'][106]:protocol

In [99]:
df_Abs.at[106,'AbL'] = 0

In [100]:
#df_Abs['Abstract'][107]: for youth

In [101]:
df_Abs.at[107,'AbL'] = 0

In [102]:
df_Abs['Abstract'][108]

'To explore the effects of acupuncture (manual acupuncture or electroacupuncture) combined with SSRIs for moderate to severe depression improving major clinical symptoms and life quality of the patients on secondary outcomes. Pragmatic, parallel, randomized controlled trial. 6 hospitals in China. 6 weeks of manual acupuncture (MA)+selective serotonin reuptake inhibitors (SSRIs), electroacupuncture (EA)+SSRIs, and SSRIs alone. The primary outcome was response rate of 17-item Hamilton Depression Scale (HAMD-17) total score at 6<sup>th</sup> week. The secondary outcomes reported in this analysis were HAMD-17 factor scores at 1<sup>st</sup>, 2<sup>nd</sup>, 4<sup>th</sup>, 6<sup>th</sup>, 10<sup>th</sup> week and WHO Quality of Life-BREF (WHOQOL-BREF) scores at 6<sup>th</sup> week. 477 patients were randomly assigned into MA\u2009+\u2009SSRIs (n\u2009=\u2009161), EA\u2009+\u2009SSRIs (n\u2009=\u2009160), or SSRIs alone (n\u2009=\u2009156) groups. For HAMD-17 (at 6<sup>th</sup> week), the M

In [103]:
#df_Abs.to_csv('C:/Users/CAM2.csv')

In [104]:
#df_Abs['Abstract'][108]: ground for massage therapist

In [105]:
df_Abs.at[108,'AbL'] = 0

In [106]:
df_Abs.head(109)

Unnamed: 0,Title,JournalInfo,Abstract,TiL,AbL
1,"""Investigation of the effectiveness of Tai Chi...",Complement Ther Clin Pract,and Purpose: There were studies showing the po...,1.0,1.0
4,"""Effect of simplified Tai Chi exercise on reli...",J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ...",1.0,1.0
5,"""Effect of tai chi versus aerobic exercise for...",BMJ,To determine the effectiveness of tai chi inte...,1.0,1.0
9,"""The effects of guided imagery on state and tr...",Complement Ther Med,This study aimed to investigate the effects of...,1.0,0.0
12,"""Guided Imagery: Reducing Anxiety, Depression,...",Clin J Oncol Nurs,Cancer treatment can be a great source of anxi...,1.0,1.0
...,...,...,...,...,...
164,"""Mind-body skills groups for treatment of war-...",Psychol Trauma,This study evaluated the effects of a mind-bod...,1.0,1.0
166,"""Longitudinal associations between mindfulness...",Int J Clin Health Psychol,"<i>Background/Objective:</i> Depression, anxie...",1.0,1.0
167,"""The SCD-Well randomized controlled trial: Eff...",Alzheimers Dement N Y,Subjectively experienced cognitive decline in ...,1.0,1.0
168,"""Pediatric perioperative measures of sleep, pa...",Complement Ther Med,The purpose of this study was to determine the...,1.0,1.0


In [107]:
#df_Abs['Abstract'][111]: not CAMs

In [108]:
df_Abs.at[111,'AbL'] = 0

In [109]:
#df_Abs['Abstract'][112]: not RCT

In [110]:
df_Abs.at[112,'AbL'] = 0

In [111]:
#df_Abs['Abstract'][113]： not CAM

In [112]:
df_Abs.at[112,'AbL'] = 0

In [113]:
#df_Abs['Abstract'][114] not RCT nor CAMs

In [114]:
df_Abs.at[114,'AbL'] = 0

In [115]:
#df_Abs['Abstract'][115]: not CAMs

In [116]:
df_Abs.at[115,'AbL'] = 0

In [117]:
#df_Abs['Abstract'][117]: for PICU patients

In [118]:
df_Abs.at[117,'AbL'] = 0

In [119]:
#df_Abs['Abstract'][119]: no sleep measure on adults

In [120]:
df_Abs.at[117,'AbL'] = 0

In [121]:
#df_Abs['Abstract'][122]: TCM plus dopamine

In [122]:
df_Abs.at[122,'AbL'] = 0

In [123]:
#df_Abs['Abstract'][126]: protocol

In [124]:
df_Abs.at[126,'AbL'] = 0

In [125]:
#df_Abs['Abstract'][127] :not original study; compared measurements

In [126]:
df_Abs.at[126,'AbL'] = 0

In [127]:
#df_Abs['Abstract'][128]: review

In [128]:
df_Abs.at[128,'AbL'] = 0

In [129]:
#df_Abs['Abstract'][132]: not CAMs

In [130]:
df_Abs.at[132,'AbL'] = 0

In [131]:
#df_Abs['Abstract'][137]: not CAMs

In [132]:
df_Abs.at[137,'AbL'] = 0

In [133]:
#df_Abs['Abstract'][138]: review

In [134]:
df_Abs.at[137,'AbL'] = 0

In [135]:
#df_Abs['Abstract'][140]:review

In [136]:
df_Abs.at[140,'AbL'] = 0

In [137]:
#df_Abs['Abstract'][144]: ACT+ TAU

In [138]:
df_Abs.at[144,'AbL'] = 0

In [139]:
#df_Abs['Abstract'][146]:protocol

In [140]:
df_Abs.at[146,'AbL'] = 0

In [141]:
#df_Abs['Abstract'][147]:protocol

In [142]:
df_Abs.at[147,'AbL'] = 0

In [143]:
#df_Abs['Abstract'][148]: review

In [144]:
df_Abs.at[148,'AbL'] = 0

In [145]:
df_Abs.to_pickle('C:/Users/CAM2.pkl')
#df_Abs.to_pickle('C:/Users/kirak/Documents/School/USF/HS651/CAM2.pkl')

In [146]:
df_Abs.head()

Unnamed: 0,Title,JournalInfo,Abstract,TiL,AbL
1,"""Investigation of the effectiveness of Tai Chi...",Complement Ther Clin Pract,and Purpose: There were studies showing the po...,1.0,1.0
4,"""Effect of simplified Tai Chi exercise on reli...",J Sports Med Phys Fitness,"Tai Chi, a kind of physical exercise, may act ...",1.0,1.0
5,"""Effect of tai chi versus aerobic exercise for...",BMJ,To determine the effectiveness of tai chi inte...,1.0,1.0
9,"""The effects of guided imagery on state and tr...",Complement Ther Med,This study aimed to investigate the effects of...,1.0,0.0
12,"""Guided Imagery: Reducing Anxiety, Depression,...",Clin J Oncol Nurs,Cancer treatment can be a great source of anxi...,1.0,1.0


In [147]:
# Preprocessing

In [18]:
df_Abs.AbL.value_counts()

1    98
0    52
Name: AbL, dtype: int64

In [13]:
df_Abs = pd.read_pickle('C:/Users/CAM2.pkl')
#df_Abs = pd.read_pickle('C:/Users/kirak/Documents/School/USF/HS651/CAM2.pkl')
df_Abs.head()

Unnamed: 0,Abstract,AbL
0,and Purpose: There were studies showing the po...,1
1,"Tai Chi, a kind of physical exercise, may act ...",1
2,To determine the effectiveness of tai chi inte...,1
3,This study aimed to investigate the effects of...,1
4,Cancer treatment can be a great source of anxi...,1


In [16]:
#del df_Abs['Title']

In [17]:
#del df_Abs['JournalInfo']

In [152]:
#del df_Abs['TiL']

In [153]:
df_Abs.head()

Unnamed: 0,Abstract,AbL
1,and Purpose: There were studies showing the po...,1.0
4,"Tai Chi, a kind of physical exercise, may act ...",1.0
5,To determine the effectiveness of tai chi inte...,1.0
9,This study aimed to investigate the effects of...,0.0
12,Cancer treatment can be a great source of anxi...,1.0


In [154]:
df_valid = df_Abs.sample(frac = 0.2, random_state = 42)

In [155]:
df_valid.to_pickle('D:/NLP/CAM2_valid.pkl')
#df_valid.to_pickle('C:/Users/kirak/Documents/School/USF/HS651/CAM2_valid.pkl')

In [156]:
df_train = df_Abs.drop(df_valid.index)

In [157]:
df_train.to_pickle('D:/NLP/CAM2_train.pkl')
#df_train.to_pickle('C:/Users/kirak/Documents/School/USF/HS651/CAM2_train.pkl')

In [158]:
df_train.AbL.value_counts()

1.0    88
0.0    43
Name: AbL, dtype: int64