In [1]:
# adding paths to importing things from "data" and "functions and methods" directories.
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))

In [2]:
from urllib.request import urlopen

from lisc import Counts
from lisc.utils.db import SCDB
from lisc.plts.counts import *

import matplotlib.pyplot as plt
import numpy as np

from data.terminology import stages, eeg_features
from utils.searchterms import format_terms

## Resources
- https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
- https://pubmed.ncbi.nlm.nih.gov/help/#search-tags

ctrl+f: Search Field descriptions and tags

In [4]:
u_eutils = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
u_db = 'db=pubmed'
u_rettype = 'rettype=count'
u_field = 'field=tiab'
u_term = 'term='

url_searchterms = format_terms(stages[0], tool='urllib')
url_searchterms = url_searchterms.replace(' ', '+')
u_term = u_term + url_searchterms

url = u_eutils + '?' + u_db + '&' + u_rettype + '&' + u_field + '&' + u_term

In [5]:
with urlopen(url) as response:
    xml = response.read()
print(url_searchterms)
xml

"stage+1+sleep"+OR+"stage+one+sleep"+OR+"light+sleep"


b'<?xml version="1.0" encoding="UTF-8" ?>\n<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD esearch 20060628//EN" "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd">\n<eSearchResult>\n\t<Count>890</Count>\n</eSearchResult>\n'

In [3]:
terms_a = format_terms(stages[0], tool='lisc')
counts = Counts()
counts.add_terms(terms_a)
counts.run_collection(verbose=True, db='pubmed', field='tiab')
counts.check_counts()

Running counts for:  "stage 1 sleep"
Running counts for:  "stage one sleep"
Running counts for:  "light sleep"
The number of documents found for each search term is:
  '"stage 1 sleep"'     -   4079
  '"stage one sleep"'   -   1940
  '"light sleep"'       -   7984


In [4]:
terms_a = ['stage 1 sleep', 'stage one sleep', 'light sleep']
counts = Counts()
counts.add_terms(terms_a)
counts.run_collection(verbose=True, db='pubmed', field='tiab')
counts.check_counts()

Running counts for:  stage 1 sleep
Running counts for:  stage one sleep
Running counts for:  light sleep
The number of documents found for each search term is:
  'stage 1 sleep'     -   321
  'stage one sleep'   -     2
  'light sleep'       -   578


In [None]:
correlation = counts.counts

fig, ax = plt.subplots()
fig = plt.gcf()
fig.set_size_inches(24, 24)
im = ax.imshow(correlation)

ax.set_xticks(np.arange(len(terms)))
ax.set_xticklabels(terms)
ax.set_yticks(np.arange(len(terms)))
ax.set_yticklabels(terms)

plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

for i in range(len(terms)):
    for j in range(len(terms)):
        text = ax.text(j, i, correlation[i][j],
                       ha="center", va="center", color="w")

ax.set_title("Term co-occurrances")
plt.show()