In [29]:
pip install itables

Collecting itables
  Downloading itables-2.2.2-py3-none-any.whl.metadata (8.3 kB)
Collecting jedi>=0.16 (from ipython->itables)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading itables-2.2.2-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi, itables
Successfully installed itables-2.2.2 jedi-0.19.1


In [34]:
from itables import init_notebook_mode
init_notebook_mode(all_interactive=False)

In [2]:
pip install Bio

Collecting Bio
  Downloading bio-1.7.1-py3-none-any.whl.metadata (5.7 kB)
Collecting biopython>=1.80 (from Bio)
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting gprofiler-official (from Bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting mygene (from Bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)
Collecting biothings-client>=0.2.6 (from mygene->Bio)
  Downloading biothings_client-0.3.1-py2.py3-none-any.whl.metadata (9.8 kB)
Downloading bio-1.7.1-py3-none-any.whl (280 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.0/281.0 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gprofiler_official-1.0.0-py3-none-any.whl

In [8]:
from Bio import Entrez

# Function to search GEO for mouse datasets related to aging and RNA-seq
def search_geo_datasets(keyword, organism="Mus musculus", db="gds", retmax=20):
    Entrez.email = "your_email@example.com"  # Always use your email
    search_term = f"{keyword} RNA-seq {organism}"

    # Search GEO datasets
    handle = Entrez.esearch(db=db, term=search_term, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()

    # Get the list of GEO dataset IDs
    dataset_ids = record["IdList"]

    # Create a list of GSE accessions to return
    gse_accessions = []

    # Fetch metadata for each dataset
    if dataset_ids:
        print(f"Found {len(dataset_ids)} datasets.")
        for geo_id in dataset_ids:
            summary_handle = Entrez.esummary(db=db, id=geo_id)
            summary_record = Entrez.read(summary_handle)
            summary_handle.close()

            for dataset in summary_record:
                print("Accession: ", dataset.get("Accession"))
                print("Title: ", dataset.get("title"))
                print("Summary: ", dataset.get("summary"))
                print("Organism: ", dataset.get("organism"))
                print("Release Date: ", dataset.get("PDAT"))
                print("=" * 60)
                gse_accessions.append(dataset.get("Accession"))
        return gse_accessions
    else:
        print("No datasets found.")

# Perform a search for aging-related mouse RNA-seq datasets
ids = search_geo_datasets("aging", organism="Mus musculus", db="gds", retmax=10)


Found 10 datasets.
Accession:  GSE262514
Title:  Mitochondrial Calcium Uniporter Promotes Kidney Aging through Inducing Mitochondrial Calcium-Mediated Renal Tubular Cell Senescence
Summary:  Kidney is a vital organ responsible for homeostasis in the body. To retard kidney aging is of great importance for maintaining body health. Whereas the therapeutic strategies targeting against kidney aging are not elucidated. Recent studies show mitochondrial dysfunction is critical for renal tubular cell senescence and kidney aging, however, the underlying mechanisms of mitochondrial dysfunction in kidney aging have not been demonstrated. Herein, we found calcium overload, and the mitochondrial calcium uniporter (MCU) was induced in renal tubular cells and aged kidney. To activate MCU not only triggered mitochondrial calcium overload, but also induced reactive oxygen species (ROS) production and cellular senescence and age-related kidney fibrosis. Inversely, to block MCU or chelate calcium diminis

In [9]:
ids

['GSE262514',
 'GSE273523',
 'GSE234682',
 'GSE233809',
 'GSE226615',
 'GSE277734',
 'GSE225576',
 'GSE272933',
 'GSE272913',
 'GSE248881']

In [10]:
pip install GEOparse

Collecting GEOparse
  Downloading GEOparse-2.0.4-py3-none-any.whl.metadata (6.5 kB)
Downloading GEOparse-2.0.4-py3-none-any.whl (29 kB)
Installing collected packages: GEOparse
Successfully installed GEOparse-2.0.4


In [36]:
import GEOparse
import pandas as pd
from itables import show

In [37]:

# Replace this with your GSE accession
gse_accession = "GSE277734"

# Download the GEO dataset by accession
gse = GEOparse.get_GEO(geo=gse_accession, destdir="./")

# Create an empty list to store sample data
sample_data = []

# Collect the GSE series name and ID
gse_name = gse.metadata['title'][0]
gse_id = gse_accession

# Accessing and collecting the sample information
for gsm_name, gsm in gse.gsms.items():
    sample_info = {
        "GSE ID": gse_id,
        "GSE Series Name": gse_name,
        "Sample": gsm_name,
        "Title": gsm.metadata['title'][0],
        "Organism": gsm.metadata['organism_ch1'][0],
        "Source Name": gsm.metadata['source_name_ch1'][0]
    }
    # Expand characteristics into separate columns
    characteristics = gsm.metadata['characteristics_ch1']

    # Add each characteristic as a separate entry in the sample_info dictionary
    for char in characteristics:
        key, value = char.split(": ", 1)  # Split key and value
        sample_info[key] = value  # Add to sample_info dictionary

    sample_data.append(sample_info)

# Create a DataFrame from the sample data
sample_df = pd.DataFrame(sample_data)

# Display the DataFrame
print(sample_df)
show(sample_df)

# Optionally, save the DataFrame to a CSV file
sample_df.to_csv("GEO_samples_with_GSE.csv", index=False)


09-Oct-2024 11:23:31 DEBUG utils - Directory ./ already exists. Skipping.
DEBUG:GEOparse:Directory ./ already exists. Skipping.
09-Oct-2024 11:23:31 INFO GEOparse - File already exist: using local version.
INFO:GEOparse:File already exist: using local version.
09-Oct-2024 11:23:31 INFO GEOparse - Parsing ./GSE277734_family.soft.gz: 
INFO:GEOparse:Parsing ./GSE277734_family.soft.gz: 
09-Oct-2024 11:23:31 DEBUG GEOparse - DATABASE: GeoMiame
DEBUG:GEOparse:DATABASE: GeoMiame
09-Oct-2024 11:23:31 DEBUG GEOparse - SERIES: GSE277734
DEBUG:GEOparse:SERIES: GSE277734
09-Oct-2024 11:23:31 DEBUG GEOparse - PLATFORM: GPL24247
DEBUG:GEOparse:PLATFORM: GPL24247
09-Oct-2024 11:23:31 DEBUG GEOparse - SAMPLE: GSM8528619
DEBUG:GEOparse:SAMPLE: GSM8528619
09-Oct-2024 11:23:31 DEBUG GEOparse - SAMPLE: GSM8528620
DEBUG:GEOparse:SAMPLE: GSM8528620
09-Oct-2024 11:23:31 DEBUG GEOparse - SAMPLE: GSM8528621
DEBUG:GEOparse:SAMPLE: GSM8528621
09-Oct-2024 11:23:31 DEBUG GEOparse - SAMPLE: GSM8528622
DEBUG:GEOpars

      GSE ID                                    GSE Series Name      Sample  \
0  GSE277734  Reduction in olfactory ability in aging Mitf m...  GSM8528619   
1  GSE277734  Reduction in olfactory ability in aging Mitf m...  GSM8528620   
2  GSE277734  Reduction in olfactory ability in aging Mitf m...  GSM8528621   
3  GSE277734  Reduction in olfactory ability in aging Mitf m...  GSM8528622   
4  GSE277734  Reduction in olfactory ability in aging Mitf m...  GSM8528623   
5  GSE277734  Reduction in olfactory ability in aging Mitf m...  GSM8528624   

                                               Title      Organism  \
0      C57BL/6J, control1, OB, female, 25 months old  Mus musculus   
1      C57BL/6J, control2, OB, female, 25 months old  Mus musculus   
2   C57BL/6J (WT), control3, OB, male, 26 months old  Mus musculus   
3  Mitfmi-vga9/mi-vga9, Mitf mutant1, OB, male, 2...  Mus musculus   
4  Mitfmi-vga9/mi-vga9, Mitf mutant2, OB, female,...  Mus musculus   
5  Mitfmi-vga9/mi-vga9, Mi

GSE ID,GSE Series Name,Sample,Title,Organism,Source Name,tissue,cell type,genotype,treatment
Loading ITables v2.2.2 from the internet... (need help?),,,,,,,,,


In [24]:
gsm.metadata

{'title': ['C57BL/6J, control1, OB, female, 25 months old'],
 'geo_accession': ['GSM8528619'],
 'status': ['Public on Sep 25 2024'],
 'submission_date': ['Sep 21 2024'],
 'last_update_date': ['Sep 25 2024'],
 'type': ['SRA'],
 'channel_count': ['1'],
 'source_name_ch1': ['Olfactory bulb'],
 'organism_ch1': ['Mus musculus'],
 'taxid_ch1': ['10090'],
 'characteristics_ch1': ['tissue: Olfactory bulb',
  'cell type: Female, 25 months old',
  'genotype: C57BL/6J (WT)',
  'treatment: No treatment'],
 'molecule_ch1': ['total RNA'],
 'extract_protocol_ch1': ['RNA was isolated using a commercially available kit Monarch Total RNA Miniprep Kit; New England Biolabs Inc., cat# T2010S.',
  "To create RNA libraries Illumina's TruSeq RNA v2 Sample Prep Kit (Illumina, RS-122-2001) was used."],
 'data_processing': ['Basecalling was performed in real-time using RTA v3.4.4. The process of demultiplexing BCL files and generating FASTQ files was done using bcl2fastq2 v.2.20.',
  'Kallisto pseudocounts with 