In [3]:
# adding paths to importing things from "data" and "functions and methods" directories.
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))

In [4]:
from urllib.request import urlopen

from lisc import Counts
from lisc.objects.base import Base
from lisc.utils.db import SCDB
from lisc.utils.io import save_object
from lisc.plts.counts import *

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from lisc_db.terms.terminology import stages, eeg_features
from utils.searchterms import format_terms

## Resources
- https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
- https://pubmed.ncbi.nlm.nih.gov/help/#search-tags

ctrl+f: Search Field descriptions and tags

In [5]:
# initialize lisc SCDB (database) object.
db = SCDB('lisc_db')
db.gen_paths()

In [11]:
terms_a = [['"stage 1 sleep"'], ['"stage one sleep"'], ['"light sleep"']]
counts = Counts()
counts.add_terms(terms_a)
counts.run_collection(verbose=False, db='pubmed', field='tiab', directory='db')
counts.check_counts()

The number of documents found for each search term is:
  '"stage 1 sleep"'     -   4083
  '"stage one sleep"'   -   1941
  '"light sleep"'       -   7991


In [12]:
### Save a lisc object into DB ###

# absolute path to project directory '../GitHub/Predict-Sleep-EEG/'.
main_dir = os.path.dirname(os.getcwd())

# absolute path to Level 2: Data directory 'data/lisc_db/data/counts/'.
db_dir = os.path.join(main_dir, r'data/' + db.get_folder_path('counts'))
db_dir

# save lisc data object in db_dir path.
save_object(counts, 'counts.p', directory=db_dir)

In [14]:
### Load a lisc object from DB ###

# relative path to lisc object in database 'data/lisc_db/data/counts/counts.p'
data_rpath = db.get_file_path('counts', 'counts.p')

# absolute path to lisc object.
data_abpath = os.path.join(main_dir, r'data/' + data_rpath)

# load lisc Counts() object with pandas.
pickle = pd.read_pickle(data_abpath)
pickle.check_counts()

The number of documents found for each search term is:
  '"stage 1 sleep"'     -   4083
  '"stage one sleep"'   -   1941
  '"light sleep"'       -   7991


In [7]:
# List of multiple terms, each with synonyms
stages

[['stage 1 sleep', 'stage one sleep', 'light sleep'],
 ['stage 2 sleep', 'stage two sleep'],
 ['stage 3 sleep', 'stage three sleep'],
 ['stage 4 sleep', 'stage four sleep'],
 ['rem', 'rapid eye movement', 'paradoxical sleep']]

In [11]:
base = Base()
base.add_terms(stages)

exclusions = [[], [], [], [], ['paradox']]
base.add_terms(exclusions, 'exclusions')