This file is provided to preprocess data for constructing input data for baselines.

Note that the file should run in SLBench environment.

In [1]:
from utils import *

The following cell shows the source of gid2uid mapping. You don't need to run it.

In [2]:
df_9845 = pd.read_csv("./meta_table_9845.csv")
gid2uid = dict(zip(df_9845['entrez_id'].astype(int), df_9845['unified_id'].astype(int)))  # NCBI gene id (int) -> SLbenchmark's id (int)
with open('gid2uid.pkl', 'wb') as f:
    pickle.dump(gid2uid, f)

**Prepare data for baselines**

In [2]:
slkb_root = f'{root}/SLKB'

In [3]:
data_root = f'{slkb_root}/specific'
save_root = f'{slkb_root}/slbench/specific'
os.makedirs(save_root, exist_ok=True)

for cl in os.listdir(data_root):
    if cl.endswith('.csv'):
        continue

    for scene in os.listdir(os.path.join(data_root, cl)):
        sl_path = os.path.join(data_root, cl, scene)
        if not os.path.exists(sl_path) or not len(os.listdir(sl_path)):
            break

        save_path = os.path.join(save_root, cl, scene)
        os.makedirs(save_path, exist_ok=True)

        gid2uid_all(sl_path, save_path)
        construct_data_npy(save_path, save_path)
        construct_data_ptgnn(save_path, save_path)

  arr = np.asanyarray(arr)


In [3]:
data_root = f'{slkb_root}/specific'
save_root = f'{slkb_root}/slbench/specific'

cell_line = 'A549'
scenes = ['C1', 'C2', 'Tail']
cuts = ['1:1', '1:5']

for scene, cut in list(itertools.product(scenes, cuts)):
    sl_path = f'{data_root}/{cell_line}/{scene}/{cut}'
    save_path = f'{save_root}/{cell_line}/{scene}/{cut}'
    os.makedirs(save_path, exist_ok=True)

    gid2uid_all(sl_path, save_path, include_test=False)

In [5]:
for scene, cut in list(itertools.product(scenes, cuts)):
    no_cut_path = f'{save_root}/{cell_line}/{scene}'
    cut_path = f'{no_cut_path}/{cut}'

    for fold in range(5):
        test_df = pd.read_csv(f'{no_cut_path}/sl_test_{fold}.csv')
        test_df.to_csv(f'{cut_path}/sl_test_{fold}.csv', index=False)

    construct_data_npy(cut_path, cut_path)
    construct_data_ptgnn(cut_path, cut_path)

  arr = np.asanyarray(arr)


In [None]:
data_root = f'{slkb_root}/transfer'
save_root = f'{slkb_root}/slbench/transfer'
os.makedirs(save_root, exist_ok=True)

for scene in os.listdir(data_root):
    sl_path = os.path.join(data_root, scene)
    save_path = os.path.join(save_root, scene)
    os.makedirs(save_path, exist_ok=True)

    gid2uid_all(sl_path, save_path)
    construct_data_npy(save_path, save_path)
    construct_data_ptgnn(save_path, save_path)