In [8]:
import pandas as pd
import requests
import re
import os
import warnings
from tqdm import tqdm

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)


In [9]:
def load_data(path):
    return pd.read_csv(path)[['HuBMAP ID']].dropna().reset_index(drop=True)

def create_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Created directory: {directory}")

def get_last_uuid(hubmap_id):
    base_url = "https://entity.api.hubmapconsortium.org/entities/"
    url = f"{base_url}{hubmap_id}"
    try:
        r = requests.get(url, timeout=10)
        if r.status_code == 200:
            uuids = re.findall(r'"uuid"\s*:\s*"([a-f0-9\-]+)"', r.text)
            return uuids[-1] if uuids else None
        return None
    except Exception:
        return None


In [10]:
csv_path = "/u/sbdubey/crosswalks.csv"
df = load_data(csv_path)
df.head()


Unnamed: 0,HuBMAP ID
0,HBM235.VKNJ.237
1,HBM238.GTNW.259
2,HBM242.LSCK.393
3,HBM284.SBPR.357
4,HBM285.VFDT.966


In [12]:
df['last_uuid'] = [
    get_last_uuid(hid) for hid in tqdm(df['HuBMAP ID'], desc="Fetching UUIDs")
]


Fetching UUIDs:   0%|          | 0/64 [00:00<?, ?it/s]

Fetching UUIDs: 100%|██████████| 64/64 [00:28<00:00,  2.23it/s]


In [13]:
out_path = "/u/sbdubey/hubmap_uuid_map.csv"
df.to_csv(out_path, index=False)
print(f"Saved: {out_path}")
df.head()


Saved: /u/sbdubey/hubmap_uuid_map.csv


Unnamed: 0,HuBMAP ID,last_uuid
0,HBM235.VKNJ.237,c8ee4bd8f052b50fab2e9e9365793374
1,HBM238.GTNW.259,a7ecf4fd858a58dcaa3bab5df455065b
2,HBM242.LSCK.393,a702c94500a2f737b722e34f6df7e7dd
3,HBM284.SBPR.357,a9d4471b41cd0d8185e12dd6c5a48e96
4,HBM285.VFDT.966,7034950e109586361e73c1b9ddb81346
