## A notebook to check millitome datasets against hra-pop

In [26]:
# install and import libraries
%pip install pandas

import pandas as pd
import glob
from itertools import chain

Note: you may need to restart the kernel to use updated packages.


In [27]:
# load data
csv_files = glob.glob('data/*.csv')

# Create an empty list to store the dataframes
dfs = []

# Loop through each CSV file and read it into a dataframe
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all dataframes into a single dataframe
combined_df = pd.concat(dfs, ignore_index=True)

# identify HuBMAP IDs from all the columns
combined_df.columns

Index(['HuBMAP Sample ID', 'Donor', 'Link', 'Lab ID', 'Parent organ type',
       'BLOCK Location', 'Assay Group', 'millitome_ID', 'HubMAP ID',
       'Submission ID', 'sample_lab_id', 'TIssue Block HuBMAP IDs',
       'RUI Location', 'Id'],
      dtype='object')

In [28]:
# get HuBMAP IDs
id_lists = []

keep = ['HuBMAP Sample ID','HubMAP ID','TIssue Block HuBMAP IDs']

for col_name in combined_df.columns:
  if col_name in keep:
    id_lists.append(combined_df[col_name].dropna().unique().tolist())

hubmap_ids_flat = [id for sub_list in id_lists for id in sub_list]
hubmap_ids_flat

['HBM692.LCNB.765',
 'HBM792.WJLJ.923',
 'HBM256.MTWQ.585',
 'HBM568.LZHR.425',
 'HBM252.QVCK.893',
 'HBM495.WLDR.795',
 'HBM588.TNGJ.868',
 'HBM644.XFGJ.857',
 'HBM385.QFHD.475',
 'HBM334.CHVK.238',
 'HBM587.DFCW.749',
 'HBM995.CQKZ.339',
 'HBM966.WGFQ.597',
 'HBM553.WQWN.884',
 'HBM374.SRZD.953',
 'HBM762.SPJF.928',
 'HBM776.DNJZ.945',
 'HBM459.VMJC.864',
 'HBM696.HWTM.483',
 'HBM992.ZLZB.786',
 'HBM759.ZTGN.372',
 'HBM379.BGFQ.837',
 'HBM476.NQXM.289',
 'HBM872.TSRG.986',
 'HBM997.HRCG.585',
 'HBM567.DVNV.954',
 'HBM779.SKXG.842',
 'HBM399.TBPX.343',
 'HBM337.QSBH.972',
 'HBM324.HZVF.467',
 'HBM269.BZSG.442',
 'HBM278.XLBD.662',
 'HBM235.XDSM.559',
 'HBM727.WSZX.242',
 'HBM927.XWTL.358',
 'HBM255.FFJQ.856',
 'HBM369.WRDC.345',
 'HBM724.GMZM.797',
 'HBM474.TQQQ.496',
 'HBM367.JCKP.625',
 'HBM785.MVWL.456',
 'HBM773.MKCR.985',
 'HBM827.MZXW.224',
 'HBM947.XDKM.768',
 'HBM224.TTJB.522',
 'HBM299.VQZH.986',
 'HBM367.VSDK.374',
 'HBM863.NCVL.825',
 'HBM256.ZGQF.566',
 'HBM353.RTXH.756',
