# Libraries

In [1]:
import pandas as pd
import numpy as np 

# Preparing Metadata

In [2]:
meta = pd.read_csv('meta_all_gene.csv')
meta = meta[meta['tech'] == 'Visium']
meta = meta[meta['species'] == 'human']
meta = meta[meta['tissue'] == 'skin']
meta.shape

(37, 11)

In [3]:
tissues = meta['tissue'].unique()
target_names = []
for tissue in tissues:
    cnt = 0
    for slide in meta['slide'][meta['tissue'] == tissue]:
        if cnt > 20:
            break
        target_names.append(slide)
        cnt += 1
        print(f"DONE: {slide}")
target_names

DONE: GSE144239_GSM4565823
DONE: GSE144239_GSM4565824
DONE: GSE144239_GSM4565825
DONE: GSE144239_GSM4565826
DONE: GSE173651_GSM5273010
DONE: GSE173651_GSM5273011
DONE: GSE173651_GSM5273012
DONE: GSE173651_GSM5273013
DONE: GSE173651_GSM5273014
DONE: GSE173651_GSM5273015
DONE: GSE182208_GSM5531131
DONE: GSE197023_GSM5907077
DONE: GSE197023_GSM5907078
DONE: GSE197023_GSM5907079
DONE: GSE197023_GSM5907080
DONE: GSE197023_GSM5907081
DONE: GSE197023_GSM5907082
DONE: GSE197023_GSM5907083
DONE: GSE197023_GSM5907084
DONE: GSE197023_GSM5907085
DONE: GSE197023_GSM5907086


['GSE144239_GSM4565823',
 'GSE144239_GSM4565824',
 'GSE144239_GSM4565825',
 'GSE144239_GSM4565826',
 'GSE173651_GSM5273010',
 'GSE173651_GSM5273011',
 'GSE173651_GSM5273012',
 'GSE173651_GSM5273013',
 'GSE173651_GSM5273014',
 'GSE173651_GSM5273015',
 'GSE182208_GSM5531131',
 'GSE197023_GSM5907077',
 'GSE197023_GSM5907078',
 'GSE197023_GSM5907079',
 'GSE197023_GSM5907080',
 'GSE197023_GSM5907081',
 'GSE197023_GSM5907082',
 'GSE197023_GSM5907083',
 'GSE197023_GSM5907084',
 'GSE197023_GSM5907085',
 'GSE197023_GSM5907086']

In [4]:
gene_files = {f"{name}_count.csv" for name in target_names}
images = {f"{name}.png" for name in target_names}
coord_files = {f"{name}_coord.csv" for name in target_names}
endings = ["gene_exp", "image", "coord"]
files = [gene_files, images, coord_files]

# Downloading Data

In [None]:
import requests
import os

for i in range(0, 3):
    
    url = f"https://huggingface.co/datasets/jiawennnn/STimage-1K4M/resolve/main/Visium/{endings[i]}/"
    save_dir = f"../data/Visium/{endings[i]}"
    os.makedirs(save_dir, exist_ok=True)  # Ensure the directory exists

    for file in files[i]:
        save_path = os.path.join(save_dir, file)
    
        # Check if file already exists
        if os.path.exists(save_path):
            print(f"{file} already exists. Skipping download.")
            continue

        # Download the file if it doesn't exist
        response = requests.get(url + file, stream=True)

        if response.status_code == 200:
            with open(save_path, "wb") as saved_file:
                for chunk in response.iter_content(chunk_size=1024):  # Download in chunks
                    saved_file.write(chunk)
            print(f"{file} downloaded successfully.")
        else:
            print(f"Failed to download {file}. Status code: {response.status_code}")


GSE197023_GSM5907084_count.csv downloaded successfully.
