In [1]:
import pickle
from pathlib import Path

# Diabetis Data set

In [38]:
requirements = {
    "path_raw_data": Path.cwd() / 'diabetes' / 'data.csv',
    "path_to_data_set": Path.cwd() / 'diabetes' / 'data_set',
    "path_training_results": Path.cwd() / 'diabetes' / 'training_results',
    "path_to_model": Path.cwd() / 'diabetes' / 'model',
    "path_to_interaction_plots": Path.cwd() / 'diabetes' / 'cTc_interactions',

    'cell_type_names': ['acinar', 'ductal', 'alpha', 'unknown', 'stromal', 'endothelial',
                        'Th', 'delta', 'macrophage', 'neutrophil', 'beta', 'otherimmune',
                        'Tc', 'gamma', 'naiveTc', 'B'],

    'markers': ['H3', 'SLC2A1', 'CD20', 'AMY2A', 'CD3e',
                'PPY', 'PIN', 'PD1', 'GCG', 'PDX1', 'SST', 'SMA', 'SYP', 'KRT19',
                'CD45', 'FOXP3', 'CD45RA', 'CD8a', 'CA9', 'IAPP', 'KI67', 'NKX61',
                'INS', 'pH3', 'CD4', 'CD31', 'CDH', 'PTPRN', 'pRB', 'cPARP1', 'Ir191',
                'Ir193', 'CD38', 'CD44', 'PCSK2', 'CD99', 'CD68', 'MPO'],
    'label_dict': {'Onset': 0,
                   'Long-duration': 1,
                   'Non-diabetic': 2},
    'label_column': 'stage',

    'eval_columns': ['part', 'stage', 'group', 'CellCat', 'CellType'],

    'col_of_interest': ['anker_value', 'radius_distance', 'fussy_limit',
                        'droupout_rate', 'comment', 'comment_norm', 'model_no', 'split_number'],
    'col_of_variables': ['droupout_rate', 'fussy_limit', 'anker_value', 'radius_distance'],

    'minimum_number_cells': 25,
    'radius_distance_all': [9, 27],
    'fussy_limit_all': [0.4, 0.8],
    'anker_value_all': [0.01, 0.2],

    'filter_column': None,
    'filter_value': None,
    'filter_cells': False,

    'anker_cell_selction_type': '%',  # either % or absolut
    'multiple_labels_per_subSample': False,

    'droupout_rate': [0.2],
    'input_layer': 38,
    'batch_size': 150,
    'learning_rate': 1e-2,

    'output_layer': 3,
    'layer_1': 38,
    'attr_bool': False,
    'comment_norm': 'no_norm',
    'databased_norm': None,

    'augmentation_number': 5,
    'X_col_name': 'AreaShape_Center_X',
    'Y_col_name': 'AreaShape_Center_Y',
    'measument_sample_name': 'image',

    'validation_split_column': 'group',
    'number_validation_splits': [1, 2, 3],
    'test_set_fold_number': [4],
    'voro_neighbours': 50,
}

In [5]:
with open(Path.cwd() / 'diabetes' / 'requirements.pt', 'wb') as f:
    pickle.dump(requirements, f)

# HCC Data set

In [4]:


requirements = {
    "path_raw_data": Path.cwd().parent / 'examples' / 'HCC' / 'data.csv',
    "path_to_data_set": Path.cwd() / 'data_sets' / 'HCC',
    "path_training_results": Path.cwd() / 'HCC' / 'training_results',
    "path_to_model": Path.cwd() / 'HCC' / 'model',
    "path_to_interaction_plots": Path.cwd() / 'HCC' / 'cTc_interactions',

    'cell_type_names': ['B cells CD38+', 'B cells PD-L1+', 'Granulocytes CD38+',
                        'Granulocytes CD38-', 'M2 Macrophages PD-L1+', 'MAITs',
                        'MHCII APCs', 'Mixed Immune CD45+', 'NK Cells CD16', 'T cells CD4',
                        'T cells CD4 PD-L1+', 'T cells CD4 naïve', 'T cells CD8 PD-1high',
                        'T cells CD8 PD-1low', 'Tregs', 'Kupffer cells',
                        'M2 Macrophages PD-L1-', 'NK Cells CD56', 'T cells CD57',
                        'T cells CD8 PD-L1+', 'B cells CD45RA'],
    'combine_cellPhenotypes': ['B cells', 'Granulocytes'],

    'sampleing': 'random',

    'markers': ['CD56.Cytoplasm.Intensity', 'CD161.Cytoplasm.Intensity',
                'CD39.Cytoplasm.Intensity', 'CD25.Cytoplasm.Intensity',
                'CD57.Cytoplasm.Intensity', 'CD40.Cytoplasm.Intensity',
                'ICOS.Cytoplasm.Intensity', 'CD3.Cytoplasm.Intensity',
                'CD62L.Cytoplasm.Intensity', 'CD45RO.Cytoplasm.Intensity',
                'CD163.Cytoplasm.Intensity', 'CD19.Cytoplasm.Intensity',
                'CD38.Cytoplasm.Intensity', 'CD11c.Cytoplasm.Intensity',
                'CD8.Cytoplasm.Intensity', 'CD11b.Cytoplasm.Intensity',
                'CD16.Cytoplasm.Intensity', 'CD69.Cytoplasm.Intensity',
                'CD15.Cytoplasm.Intensity', 'CD45RA.Cytoplasm.Intensity',
                'CD4.Cytoplasm.Intensity', 'CD66b.Cytoplasm.Intensity',
                'CD68.Cytoplasm.Intensity'],

    'label_dict': {'normalLiver': 0,
                   'core': 1,
                   'rim': 2},
    'label_column': 'Tissue',

    'eval_columns': ['Class', 'Class0', 'Celltype'],

    'col_of_interest': ['anker_value', 'radius_distance',
                        'droupout_rate', 'comment', 'comment_norm', 'model_no', 'split_number'],
    'col_of_variables': ['droupout_rate', 'anker_value', 'radius_distance'],

    'minimum_number_cells': 25,
    'radius_distance_all': [530],
    'anker_value_all': [500],

    'filter_column': ['CD45.Positive.Classification'],
    'filter_value': 1,
    'filter_cells': True,

    'anker_cell_selction_type': 'absolut',  # either % or absolut
    'multiple_labels_per_subSample': True,

    'batch_size': 150,
    'learning_rate': 1e-2,

    'input_layer': 23,
    'layer_1': 23,
    'output_layer': 3,
    'comment_norm': 'no_norm',
    'databased_norm': None,

    'droupout_rate': [0.8],
    'attr_bool': False,

    'augmentation_number': 5,
    'X_col_name': 'X_value',
    'Y_col_name': 'Y_value',
    'measument_sample_name': 'Patient',

    'validation_split_column': 'fold',
    'number_validation_splits': [2, 3, 4, 5],
    'test_set_fold_number': [1],
    'voro_neighbours': 50,
}



In [5]:
pickle.dump(requirements, open(Path.cwd() / f'req_HCC.pt', 'wb'))

# Colorectal Cancer Data set

In [3]:
requirements = {
    "path_raw_data": Path.cwd() / 'CRC' / 'crc_data.csv',
    "path_to_data_set": Path.cwd() / 'CRC' / 'data_set',
    "path_training_results": Path.cwd() / 'CRC' / 'training_results',
    "path_to_model": Path.cwd() / 'CRC' / 'model',
    "path_to_interaction_plots": Path.cwd() / 'CRC' / 'cTc_interactions',

    'cell_type_names': ['CD4+ T cells CD45RO+', 'CD68+ macrophages',
                        'CD68+CD163+ macrophages', 'CD8+ T cells', 'Tregs', 'granulocytes',
                        'plasma cells', 'smooth muscle', 'stroma', 'tumor cells',
                        'vasculature', 'B cells', 'CD4+ T cells',
                        'immune cells / vasculature', 'undefined', 'nerves',
                        'CD4+ T cells GATA3+', 'immune cells', 'CD11b+CD68+ macrophages',
                        'CD3+ T cells', 'adipocytes', 'tumor cells / immune cells',
                        'CD11c+ DCs', 'lymphatics', 'CD11b+ monocytes',
                        'CD68+ macrophages GzmB+', 'NK cells'],

    'markers': ['CD44', 'FOXP3', 'CD8', 'p53', 'GATA3', 'CD45', 'T.bet', 'beta.catenin',
                'HLA.DR', 'PD.L1', 'Ki67', 'CD45RA', 'CD4', 'CD21', 'MUC.1', 'CD30',
                'CD2', 'Vimentin', 'CD20', 'LAG.3', 'Na.K.ATPase', 'CD5', 'IDO.1',
                'Cytokeratin', 'CD11b', 'CD56', 'aSMA', 'BCL.2', 'CD25', 'CD11c',
                'PD.1', 'Granzyme.B', 'EGFR', 'VISTA', 'CD15', 'ICOS', 'Synaptophysin',
                'GFAP', 'CD7', 'CD3', 'Chromogranin.A', 'CD163', 'CD45RO', 'CD68',
                'CD31', 'Podoplanin', 'CD34', 'CD38', 'CD138', 'CDX2', 'Collagen.IV',
                'CD194', 'MMP9', 'CD71', 'CD57', 'MMP12'],

    'label_dict': {'good_responder': 0,
                   'bad_responder': 1},
    'label_column': 'groups',

    'eval_columns': ['patients', 'spots', 'CellType'],

    'col_of_interest': ['anker_value', 'radius_distance', 'fussy_limit',
                        'droupout_rate', 'comment', 'comment_norm', 'model_no', 'split_number'],
    'col_of_variables': ['droupout_rate', 'fussy_limit', 'anker_value', 'radius_distance', 'comment', 'comment_norm'],

    'minimum_number_cells': 50,
    'radius_distance_all': [25, 55, 75],
    'fussy_limit_all': [0.1, 0.5],
    'anker_value_all': [0.03, 0.08],
    'comment_norm': 'sample_norm_forceTrain',
    'databased_norm': 'global_std',

    'filter_column': None,
    'filter_value': None,
    'filter_cells': False,

    'anker_cell_selction_type': '%',  # either % or absolut
    'multiple_labels_per_subSample': False,

    'droupout_rate': [0.2],
    'input_layer': 56,
    'batch_size': 150,
    'learning_rate': 1e-2,
    'patience': 2,

    'output_layer': 2,
    'layer_1': 56,
    'attr_bool': False,

    'augmentation_number': 5,
    'X_col_name': 'X_withinTile.X_withinTile',
    'Y_col_name': 'Y_withinTile.Y_withinTile',
    'measument_sample_name': 'spots',

    'validation_split_column': 'val_column',
    'number_validation_splits': [1, 2, 3],
    'test_set_fold_number': [4],
    'voro_neighbours': 10,
}

In [4]:
with open(Path.cwd() / 'CRC' / 'requirements.pt', 'wb') as f:
    pickle.dump(requirements, f)