In [4]:
import numpy as np
import pandas as pd
import pickle, os
import ipywidgets
def prepareData(tissue, labelfile):
    work_dir = "/Users/maurizio/HELP/data4rev/DeepHE"
    label_name="label"
    # load embedding
    n2v = pd.read_csv(f"/Users/maurizio/HELP/data/{tissue}_EmbN2V_128.csv", index_col=0)
    # load labels
    print(f"Loading {labelfile}")
    lab = pd.read_csv(labelfile, index_col=0).replace({'aE': 'NE', 'sNE':'NE'})
    # load sequence features
    seq = pd.read_csv(f"{work_dir}/nodes_seq_feat.csv", index_col=0)
    seq.index = seq.index.map(str)
    seq = seq.dropna(axis='columns', how='all')
    seq = seq.fillna(seq.mean())
    seq_idx_common_NE = np.intersect1d(lab[lab[label_name]=='NE'].index.values, seq.index.values)
    seq_idx_common_E = np.intersect1d(lab[lab[label_name]=='E'].index.values, seq.index.values)
    n2v_idx_common_NE = np.intersect1d(lab[lab[label_name]=='NE'].index.values, n2v.index.values)
    n2v_idx_common_E = np.intersect1d(lab[lab[label_name]=='E'].index.values, n2v.index.values)
    seqn2v_idx_common_NE = np.intersect1d(np.intersect1d(lab[lab[label_name]=='NE'].index.values, n2v.index.values), seq.index.values)
    seqn2v_idx_common_E = np.intersect1d(np.intersect1d(lab[lab[label_name]=='E'].index.values, n2v.index.values), seq.index.values)
    print(f'{len(seq_idx_common_NE)+len(seq_idx_common_E)} common genes over {max(len(seq),len(lab))}')
    # filter only E genes in files (and add label column)
    n2v_E = pd.concat([n2v.loc[n2v_idx_common_E], 
                       pd.DataFrame({"label" : [1] * len(n2v_idx_common_E)}).set_index(n2v_idx_common_E)], axis=1)
    seq_E = pd.concat([seq.loc[seq_idx_common_E], 
                       pd.DataFrame({"label" : [1] * len(seq_idx_common_E)}).set_index(seq_idx_common_E)], axis=1)
    # filter only NE genes in files (and add label column)
    n2v_NE = pd.concat([n2v.loc[n2v_idx_common_NE], 
                        pd.DataFrame({"label" : [0] * len(n2v_idx_common_NE)}).set_index(n2v_idx_common_NE)], axis=1)
    seq_NE = pd.concat([seq.loc[seq_idx_common_NE], 
                        pd.DataFrame({"label" : [0] * len(seq_idx_common_NE)}).set_index(seq_idx_common_NE)], axis=1)
    # save files in pickle
    with open(os.path.join(work_dir,"ess_seqFeature.pickle"), "wb") as handle:
       pickle.dump(seq_E.to_numpy(), handle)
    with open(os.path.join(work_dir,"ess_embedFeature.pickle"), "wb") as handle:
       pickle.dump(n2v_E.to_numpy(), handle)
    with open(os.path.join(work_dir,"ness_seqFeature.pickle"), "wb") as handle:
       pickle.dump(seq_NE.to_numpy(), handle)
    with open(os.path.join(work_dir,"ness_embedFeature.pickle"), "wb") as handle:
       pickle.dump(n2v_NE.to_numpy(), handle)
    # save seq+n2v files in pickle
    seqn2v_E = pd.concat([n2v.loc[seqn2v_idx_common_E], seq.loc[seqn2v_idx_common_E], 
                          pd.DataFrame({"label" : [1] * len(seqn2v_idx_common_E)}).set_index(seqn2v_idx_common_E)], axis=1)
    seqn2v_NE = pd.concat([n2v.loc[seqn2v_idx_common_NE], seq.loc[seqn2v_idx_common_NE],
                           pd.DataFrame({"label" : [0] * len(seqn2v_idx_common_NE)}).set_index(seqn2v_idx_common_NE)], axis=1)
    with open(os.path.join(work_dir,"ess_seqFeature_embedF.pickle"), "wb") as handle:
       pickle.dump(seqn2v_E.to_numpy(), handle)
    with open(os.path.join(work_dir,"ness_seqFeature_embedF.pickle"), "wb") as handle:
       pickle.dump(seqn2v_NE.to_numpy(), handle)

In [7]:
%cd /Users/maurizio/HELP/data4rev/DeepHE
tissue = 'Lung'
labelfile = f"../../data/{tissue}_HELP.csv"
prepareData(tissue, labelfile)
import os
def myrun(embedtype = "seq", repeat=10):
    dm = {"seq" : 0, "embed": 1, "seq+embed": 2, "bio" : 3, "bio+embed": 4, "seq+bio+embed": 5}
    filename = f"Experiment_{os.path.basename(labelfile).split('.')[0]}_{embedtype}"
    precision = 3
    command = f'main.py --expName {filename} --fold 2 --embedF {dm[embedtype]} --data_dir . --repeat {repeat} --result_dir results'
    print(command)
    %run {command}
    with open(os.path.join("results", filename + '.txt'), "r") as file:
        m = [f"{round(float(f.strip().split(' ')[1].split('±')[0]),precision)}±{round(float(f.strip().split(' ')[1].split('±')[1]),precision)}" for f in file.readlines()[-17:21]]
        print(" & ".join([m[index] for index in [0,5,6,2,3,7]]))

from ipywidgets import interact_manual, IntSlider
run_iteract = interact_manual.options(manual_name="Run...")
run_iteract(myrun, embedtype = ["seq", "embed", "seq+embed"], repeat=IntSlider(value=10, description='repeat', max=10, min=1));

/Users/maurizio/HELP/data4rev/DeepHE
Loading ../../data/Lung_HELP.csv
17236 common genes over 18608


interactive(children=(Dropdown(description='embedtype', options=('seq', 'embed', 'seq+embed'), value='seq'), I…

In [3]:
from keras.utils import to_categorical
