# Libraries

In [None]:
import pandas as pd
import numpy as np 

# Preparing Metadata

In [3]:
meta = pd.read_csv('meta_all_gene.csv')
meta = meta[meta['tech'] == 'Visium']
meta = meta[meta['species'] == 'human']
meta.shape

(90, 11)

In [4]:
tissues = meta['tissue'].unique()
target_names = []
for tissue in tissues:
    cnt = 0
    for slide in meta['slide'][meta['tissue'] == tissue]:
        if cnt > 20:
            break
        target_names.append(slide)
        cnt += 1
        print(f"DONE: {slide}")
target_names

{'GSE179572_GSM5420749',
 'GSE179572_GSM5420750',
 'GSE179572_GSM5420751',
 'GSE179572_GSM5420752',
 'GSE179572_GSM5420753',
 'GSE179572_GSM5420754',
 'GSE184510_GSM5591748',
 'GSE184510_GSM5591749',
 'GSE184510_GSM5591750',
 'GSE184510_GSM5591751',
 'GSE184510_GSM5591752',
 'GSE184510_GSM5591753',
 'GSE184510_GSM5591754',
 'GSE184510_GSM5591755',
 'GSE232910_GSM7392324',
 'GSE232910_GSM7392325',
 'GSE232910_GSM7392326',
 'GSE232910_GSM7392327',
 'GSE232910_GSM7392328',
 'GSE235672_GSM7507311',
 'GSE235672_GSM7507312',
 'GSE235672_GSM7507313',
 'GSE235672_GSM7507314',
 'GSE235672_GSM7507315',
 'GSE235672_GSM7507316',
 'GSE235672_GSM7507317',
 'GSE235672_GSM7507318',
 'GSE235672_GSM7507319',
 'GSE235672_GSM7507320',
 'GSE235672_GSM7507321',
 'GSE235672_GSM7507322',
 'GSE235672_GSM7507323',
 'GSE235672_GSM7507324',
 'GSE235672_GSM7507325',
 'GSE235672_GSM7507326',
 'Human_Brain_10X_02132023_Visium',
 'Human_Brain_10X_10272020_Visium_Cerebellum_WholeTranscriptome',
 'Human_Brain_10X_10272

In [5]:
gene_files = {f"{name}_count.csv" for name in target_names}
images = {f"{name}.png" for name in target_names}
coord_files = {f"{name}_coord.csv" for name in target_names}
endings = ["gene_exp", "image", "coord"]
files = [gene_files, images, coord_files]

# Downloading Data

In [None]:
import requests
import os

for i in range(0, 3):
    
    url = f"https://huggingface.co/datasets/jiawennnn/STimage-1K4M/resolve/main/Visium/{endings[i]}/"
    save_dir = f"../data/Visium/{endings[i]}"
    os.makedirs(save_dir, exist_ok=True)  # Ensure the directory exists

    for file in files[i]:
        save_path = os.path.join(save_dir, file)
    
        # Check if file already exists
        if os.path.exists(save_path):
            print(f"{file} already exists. Skipping download.")
            continue

        # Download the file if it doesn't exist
        response = requests.get(url + file, stream=True)

        if response.status_code == 200:
            with open(save_path, "wb") as saved_file:
                for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
                    saved_file.write(chunk)
            print(f"{file} downloaded successfully.")
        else:
            print(f"Failed to download {file}. Status code: {response.status_code}")
