<a href="https://colab.research.google.com/github/fogg-lab/transcriptomic-data-integrator/blob/main/notebooks/GEO_search_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

In [None]:
# Clone repository
!git clone https://github.com/fogg-lab/transcriptomic-data-integrator.git

# Install package
!pip install ./transcriptomic-data-integrator

In [None]:
# Privately configure email address for the NCBI API
from getpass import getpass
import subprocess
with open('/dev/null', 'w') as devnull:
  subprocess.run(["configure-ncbi-email", getpass('Enter your email address: ')],
                 stdout=devnull, stderr=devnull, check=True)

## Examples

In [None]:
""" Example 1: Basic keyword search """
import transcriptomic_data_integrator as tdi

query = "heart disease homo sapiens"
# Limit to 20 results (can be increased)
max_results = 20

id_list = tdi.geo.search_geo(query, max_results=max_results)

descriptions = tdi.geo.get_descriptions_from_ids(id_list)
for accession, study_description in descriptions.items():
    print(f"{accession}: {study_description}")

In [None]:
""" Example 2: Search by gene symbol and organism """
import transcriptomic_data_integrator as tdi

query = "BRCA1[Gene Name] AND Homo sapiens[Organism]"

# Using default max_results which is 25
id_list = tdi.geo.search_geo(query)

descriptions = tdi.geo.get_descriptions_from_ids(id_list)
for accession, study_description in descriptions.items():
    print(f"{accession}: {study_description}")

In [None]:
""" Example 3: Search by platform technology """
import transcriptomic_data_integrator as tdi

query = "GPL10558[Platform] AND cancer"

# Limit to 50 results
max_results = 50

id_list = tdi.geo.search_geo(query, max_results=max_results)

# Set the default accession to "unknown"
descriptions = tdi.geo.get_descriptions_from_ids(id_list, default_accession="unknown")
for accession, study_description in descriptions.items():
    print(f"{accession}: {study_description}")

In [None]:
""" Example 4: Search by publication date range """
import transcriptomic_data_integrator as tdi

# Liver disease studies added to GEO in the first week of 2023
query = "2023/01/01[PDAT] : 2023/01/07[PDAT] AND liver disease"

id_list = tdi.geo.search_geo(query)

descriptions = tdi.geo.get_descriptions_from_ids(id_list, default_accession="unknown")
for accession, study_description in descriptions.items():
    print(f"{accession}: {study_description}")

In [None]:
""" Example 5: Search by study type """
import transcriptomic_data_integrator as tdi

# Microarray studies related to diabetes
query = "Expression profiling by array[Study Type] AND diabetes"

# Show up to 75 results
id_list = tdi.geo.search_geo(query, max_results=75)

descriptions = tdi.geo.get_descriptions_from_ids(id_list)
for accession, study_description in descriptions.items():
    print(f"{accession}: {study_description}")