# pycisTopic analysis

Cell downsampled dataset, using consensus peak regions.

In [1]:
import pycisTopic
pycisTopic.__version__

'0.1.dev295+gbd4bf4d.d20210830'

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [3]:
import pickle
import pandas as pd

In [4]:
import os
wdir = '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/'
os.chdir( wdir )

---

In [5]:
fragments_dict = {
'Broad_1':      '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Broad_1.sinto.fragments.tsv.gz',
'Broad_2':      '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Broad_2.sinto.fragments.tsv.gz',
'Broad_mito_1': '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Broad_mito_1.sinto.fragments.tsv.gz',
'Broad_mito_2': '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Broad_mito_2.sinto.fragments.tsv.gz',
'CNAG_1':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/CNAG_1.sinto.fragments.tsv.gz',
'CNAG_2':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/CNAG_2.sinto.fragments.tsv.gz',
's3atac':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/s3atac.sinto.fragments.tsv.gz',
'Sanger_1':     '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Sanger_1.sinto.fragments.tsv.gz',
'Sanger_2':     '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Sanger_2.sinto.fragments.tsv.gz',
'Stanford_1':   '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Stanford_1.sinto.fragments.tsv.gz',
'Stanford_2':   '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/Stanford_2.sinto.fragments.tsv.gz',
'VIB_1':        '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/VIB_1.sinto.fragments.tsv.gz',
'VIB_2':        '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/fragments/VIB_2.sinto.fragments.tsv.gz',
}

In [6]:
consensus_peaks_dict = {
'Broad_1':      '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Broad_1/Broad_1__consensus_regions.bed',
'Broad_2':      '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Broad_2/Broad_2__consensus_regions.bed',
'Broad_mito_1': '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Broad_mito_1/Broad_mito_1__consensus_regions.bed',
'Broad_mito_2': '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Broad_mito_2/Broad_mito_2__consensus_regions.bed',
'CNAG_1':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/CNAG_1/CNAG_1__consensus_regions.bed',
'CNAG_2':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/CNAG_2/CNAG_2__consensus_regions.bed',
'merged':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/merged/merged__consensus_regions.bed',
's3atac':       '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/s3atac/s3atac__consensus_regions.bed',
'Sanger_1':     '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Sanger_1/Sanger_1__consensus_regions.bed',
'Sanger_2':     '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Sanger_2/Sanger_2__consensus_regions.bed',
'Stanford_1':   '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Stanford_1/Stanford_1__consensus_regions.bed',
'Stanford_2':   '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/Stanford_2/Stanford_2__consensus_regions.bed',
'VIB_1':        '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/VIB_1/VIB_1__consensus_regions.bed',
'VIB_2':        '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/consensus_peak_calling/VIB_2/VIB_2__consensus_regions.bed',    
}

## Library QC metrics

In [7]:
from pycisTopic.qc import compute_qc_stats

In [8]:
f_biomart = '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021/atac_qc_multiplet_merged/jupyter/biomart_annot.pickle'
with open(f_biomart, 'rb') as f:
    annot = pickle.load(f)
annot

Unnamed: 0,Chromosome,Start,Strand,Gene,Transcript_type
8986,chrY,9337464,1,TSPY4,protein_coding
8987,chrY,9337510,1,TSPY4,protein_coding
9024,chrY,22490397,1,PRY,protein_coding
9100,chrY,14056227,1,VCY1B,protein_coding
9136,chrY,12662368,1,USP9Y,protein_coding
...,...,...,...,...,...
236024,chr1,36479519,-1,CSF3R,protein_coding
236025,chr1,36471474,-1,CSF3R,protein_coding
236033,chr1,36482051,-1,CSF3R,protein_coding
236034,chr1,36323645,-1,EVA1B,protein_coding


In [9]:
metadata_bc_dict, profile_data_dict = compute_qc_stats(
        fragments_dict=fragments_dict,
        tss_annotation=annot,
        stats=['barcode_rank_plot', 'duplicate_rate', 'insert_size_distribution', 'profile_tss', 'frip'],
        label_list=None,
        path_to_regions=consensus_peaks_dict,
        n_cpu=5,
        valid_bc=None,
        n_frag=100,
        n_bc=None,
        tss_flank_window=2000,
        tss_window=50,
        tss_minimum_signal_window=100,
        tss_rolling_window=10,
        min_norm=0.1,
        remove_duplicates = True,
        )

2021-09-14 15:36:54,014	INFO services.py:1263 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(pid=4301)[0m 2021-09-14 15:36:56,933 cisTopic     INFO     Reading Broad_1
[2m[36m(pid=4298)[0m 2021-09-14 15:36:56,842 cisTopic     INFO     Reading Broad_2
[2m[36m(pid=4300)[0m 2021-09-14 15:36:56,842 cisTopic     INFO     Reading Broad_mito_1
[2m[36m(pid=4299)[0m 2021-09-14 15:36:56,932 cisTopic     INFO     Reading CNAG_1
[2m[36m(pid=4297)[0m 2021-09-14 15:36:56,834 cisTopic     INFO     Reading Broad_mito_2
[2m[36m(pid=4299)[0m 2021-09-14 15:37:09,620 cisTopic     INFO     Computing barcode rank plot for CNAG_1
[2m[36m(pid=4299)[0m 2021-09-14 15:37:09,620 cisTopic     INFO     Counting fragments
[2m[36m(pid=4299)[0m 2021-09-14 15:37:10,188 cisTopic     INFO     Marking barcodes with more than 100
[2m[36m(pid=4299)[0m 2021-09-14 15:37:10,211 numexpr.utils INFO     Note: NumExpr detected 36 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[2m[36m(pid=4299)[0m 2021-09-14 15:37:10,212 cisTopic     INFO     Returning plot data

In [10]:
f_qc_dir = 'qc__consensus_peaks'
if not os.path.exists(os.path.join(wdir, f_qc_dir)):
    os.makedirs(os.path.join(wdir, f_qc_dir))
    
with open(os.path.join(wdir, f_qc_dir, 'metadata.pickle'), 'wb') as f:
    pickle.dump(metadata_bc_dict, f)

with open(os.path.join(wdir, f_qc_dir, 'profile_data.pickle'), 'wb') as f:
    pickle.dump(profile_data_dict, f)

## Create the cisTopic objects for each sample

In [7]:
# Valid barcodes
with open(wdir + 'barcodes_passing_filters2.pkl', 'rb') as f:
    bc_passing_filters = pickle.load(f)

In [8]:
from pycisTopic.cistopic_class import create_cistopic_object_from_fragments

In [9]:
#path_to_regions = '/staging/leuven/stg_00002/lcb/cbravo/SCREEN_ENCODE3/V2.hg38-rDHS-Unfiltered.blacklisted.bed'
path_to_blacklist = '/staging/leuven/stg_00002/lcb/cflerin/testruns/pycisTopic/pycisTopic/blacklist/hg38-blacklist.v2.bed'

In [10]:
f_cto_dir = 'cistopic_objs__consensus_peaks'
if not os.path.exists(os.path.join(wdir, f_cto_dir)):
    os.makedirs(os.path.join(wdir, f_cto_dir))

In [11]:
#Create all objects

for key in bc_passing_filters.keys():
    f_out = os.path.join(wdir, f_cto_dir, key + '__cistopic_obj.pkl')
    if os.path.isfile(f_out):
        print(f"Skipping {key}")
        continue
        
    tmp_cto = create_cistopic_object_from_fragments(path_to_fragments=fragments_dict[key],
                                                    path_to_regions=consensus_peaks_dict[key],
                                                    path_to_blacklist=path_to_blacklist,
                                                    #metrics=metadata_bc_dict[key],
                                                    valid_bc=bc_passing_filters[key],
                                                    n_cpu=1,
                                                    partition=20,
                                                    project=key)
    
    with open(f_out, 'wb') as f:
        pickle.dump(tmp_cto, f)
    
    print(f"DONE {key}")


Skipping Broad_1
Skipping Broad_2
Skipping Broad_mito_1
Skipping Broad_mito_2
Skipping CNAG_1
Skipping CNAG_2
Skipping s3atac
Skipping Sanger_1
Skipping Sanger_2
Skipping Stanford_1
Skipping Stanford_2
Skipping VIB_1
Skipping VIB_2


## Create a merged object

Here, use a common set of peaks for each sample, then merge them

In [12]:
f_cto_merged_dir = 'cistopic_objs__consensus_peaks_merged'
if not os.path.exists(os.path.join(wdir, f_cto_merged_dir)):
    os.makedirs(os.path.join(wdir, f_cto_merged_dir))

In [13]:
#Create all objects

for key in bc_passing_filters.keys():
    f_out = os.path.join(wdir, f_cto_merged_dir, key + '__cistopic_obj.pkl')
    if os.path.isfile(f_out):
        print(f"Skipping {key}")
        continue
        
    tmp_cto = create_cistopic_object_from_fragments(path_to_fragments=fragments_dict[key],
                                                    path_to_regions=consensus_peaks_dict['merged'],
                                                    path_to_blacklist=path_to_blacklist,
                                                    #metrics=metadata_bc_dict[key],
                                                    valid_bc=bc_passing_filters[key],
                                                    n_cpu=1,
                                                    partition=20,
                                                    project=key)
    
    with open(f_out, 'wb') as f:
        pickle.dump(tmp_cto, f)
    
    print(f"DONE {key}")


Skipping Broad_1
Skipping Broad_2
Skipping Broad_mito_1
Skipping Broad_mito_2
Skipping CNAG_1
Skipping CNAG_2
Skipping s3atac
Skipping Sanger_1
Skipping Sanger_2
Skipping Stanford_1
Skipping Stanford_2
Skipping VIB_1
Skipping VIB_2


In [14]:
# load objects into dict:
cistopic_obj_dict = {}
for key in bc_passing_filters.keys():
    f_out = os.path.join(wdir, f_cto_merged_dir, key + '__cistopic_obj.pkl')
    with open(f_out, 'rb') as f:
        cistopic_obj_dict[key] = pickle.load(f)
    print(f"Loaded {key}")

Loaded Broad_1
Loaded Broad_2
Loaded Broad_mito_1
Loaded Broad_mito_2
Loaded CNAG_1
Loaded CNAG_2
Loaded s3atac
Loaded Sanger_1
Loaded Sanger_2
Loaded Stanford_1
Loaded Stanford_2
Loaded VIB_1
Loaded VIB_2


In [15]:
import copy

In [16]:
f_out = os.path.join(wdir, f_cto_merged_dir, 'cellds_merged__cistopic_obj.pkl')
if os.path.isfile(f_out):
    print(f"Loading {f_out}")
    with open(f_out, 'rb') as f:
        cistopic_obj_dict['merged'] = pickle.load(f)
else:
    # merge:
    ct_keys = list(cistopic_obj_dict.keys())
    cto_merged = copy.copy(cistopic_obj_dict[ct_keys[0]])
    cto_merged.merge([ cistopic_obj_dict[k] for k in ct_keys[1:] ], project='scATAC-seq_benchmark')
    with open(f_out, 'wb') as f:
        pickle.dump(cto_merged, f)

2021-09-15 11:49:43,074 cisTopic     INFO     cisTopic object 1 merged
2021-09-15 11:49:48,865 cisTopic     INFO     cisTopic object 2 merged
2021-09-15 11:49:55,561 cisTopic     INFO     cisTopic object 3 merged
2021-09-15 11:50:03,457 cisTopic     INFO     cisTopic object 4 merged
2021-09-15 11:50:11,848 cisTopic     INFO     cisTopic object 5 merged
2021-09-15 11:50:20,721 cisTopic     INFO     cisTopic object 6 merged
2021-09-15 11:50:30,925 cisTopic     INFO     cisTopic object 7 merged
2021-09-15 11:50:42,607 cisTopic     INFO     cisTopic object 8 merged
2021-09-15 11:50:54,504 cisTopic     INFO     cisTopic object 9 merged
2021-09-15 11:51:06,880 cisTopic     INFO     cisTopic object 10 merged
2021-09-15 11:51:20,550 cisTopic     INFO     cisTopic object 11 merged
2021-09-15 11:51:36,110 cisTopic     INFO     cisTopic object 12 merged


## Create a set of cisTopic objects for downstream analysis

Include each sample individually (using the sample-specific consensus peaks), plus the merged sample (using the merged consensus peaks).

In [17]:
del cistopic_obj_dict

cistopic_obj_dict = {}
# load sample objects into dict:
for key in bc_passing_filters.keys():
    f_out = os.path.join(wdir, f_cto_dir, key + '__cistopic_obj.pkl')
    with open(f_out, 'rb') as f:
        cistopic_obj_dict[key] = pickle.load(f)
    print(f"Loaded {key}")

# add the merged object:
f_out = os.path.join(wdir, f_cto_merged_dir, 'cellds_merged__cistopic_obj.pkl')
if os.path.isfile(f_out):
    print(f"Loading {f_out}")
    with open(f_out, 'rb') as f:
        cistopic_obj_dict['merged'] = pickle.load(f)

Loaded Broad_1
Loaded Broad_2
Loaded Broad_mito_1
Loaded Broad_mito_2
Loaded CNAG_1
Loaded CNAG_2
Loaded s3atac
Loaded Sanger_1
Loaded Sanger_2
Loaded Stanford_1
Loaded Stanford_2
Loaded VIB_1
Loaded VIB_2
Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/cistopic_objs__consensus_peaks_merged/cellds_merged__cistopic_obj.pkl


## Add cell annotations

In [18]:
cellannot = pd.read_csv(
    '/staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/cellds_filtered_cell_data.tsv',
    sep='\t', index_col=0)
cellannot

Unnamed: 0,Doublet_scores_fragments,Total_nr_frag_in_regions,pycisTopic_leiden_10_1.2,Dupl_nr_frag,barcode,seurat_cell_type_pred_score,pycisTopic_leiden_10_1.0,pycisTopic_leiden_10_0.6,Unique_nr_frag_in_regions,Predicted_doublets_fragments,...,Dupl_rate,pycisTopic_leiden_10_0.8,Log_total_nr_frag,Total_nr_frag,cisTopic_nr_frag,Unique_nr_frag,pycisTopic_leiden_10_1.4,pycisTopic_leiden_10_1.6,pycisTopic_leiden_10_1.8,consensus_cell_type
TTCCTCTATGGTGTTTATCAT_GCGTAGACACGCCACTTCATC_ATAGGCACGAGTGGCGCCTAA-Broad_1,0.127469,6949,8,3440,TTCCTCTATGGTGTTTATCAT_GCGTAGACACGCCACTTCATC_AT...,0.682683,8,10,4268,False,...,0.370650,10,3.967595,9281,6019,5841,7,25,26,CD14+ monocyte
GCGACTCAGACCATTTATCAT_CGACACTTCGCGCATTCCTCT_CGCAATCGAGGACACCGAACC-Broad_1,0.354582,7652,2,3538,GCGACTCAGACCATTTATCAT_CGACACTTCGCGCATTCCTCT_CG...,0.516413,2,3,4554,False,...,0.391026,2,3.956553,9048,5884,5510,4,6,7,Cytotoxic T cell
TACCGAATGACCGCATTCGTT_CTCATTTACTCAATAGCAACG_TGTCGCTCGATTACCACATGA-Broad_1,0.235127,7275,0,4158,TACCGAATGACCGCATTCGTT_CTCATTTACTCAATAGCAACG_TG...,0.612734,1,1,3891,False,...,0.443615,0,3.971879,9373,5470,5215,0,0,5,CD4+ T cell
ACGTTGGGAACCGTCCTCCTT_AACTCTTGGTTAGTCTCTTGC_GATCACCTTATCATGAATATG_TGGTGAATCCTTAAGGAGCCT-Broad_1,0.284768,8422,0,4752,ACGTTGGGAACCGTCCTCCTT_AACTCTTGGTTAGTCTCTTGC_GA...,0.783805,18,1,4193,False,...,0.480631,0,3.995065,9887,5322,5135,10,7,6,CD4+ T cell
TCATACACCAAGCTCATGTAT_AGGATACCCGCGATATGTTCC_TAACGCCGAATCAAGAATCAA-Broad_1,0.508197,6925,8,3824,TCATACACCAAGCTCATGTAT_AGGATACCCGCGATATGTTCC_TA...,0.616381,8,10,3700,False,...,0.440705,10,3.938370,8677,4584,4853,7,25,26,CD14+ monocyte
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTTCCAGCAAGGTCGA-VIB_2,0.117647,5697,21,1166,CTTCCAGCAAGGTCGA,0.961163,16,0,4843,False,...,0.133425,20,3.941462,8739,6498,7573,14,14,13,CD14+ monocyte
GTCCATCAGTACCTCA-VIB_2,0.131222,5164,1,1389,GTCCATCAGTACCTCA,0.979147,0,0,4177,False,...,0.159216,1,3.940716,8724,5883,7335,3,2,2,CD14+ monocyte
TTGTTCACAACTCGAT-VIB_2,0.278075,5062,21,2031,TTGTTCACAACTCGAT,0.782794,16,0,3726,False,...,0.225441,20,3.954677,9009,5464,6978,14,14,13,CD14+ monocyte
CTTCCAGTCAGTGTGT-VIB_2,0.165775,6068,1,1931,CTTCCAGTCAGTGTGT,0.859730,0,0,4579,False,...,0.201230,1,3.982090,9596,6208,7665,14,2,13,CD14+ monocyte


In [19]:
for key in cistopic_obj_dict.keys():
    cistopic_obj_dict[key].add_cell_data(
        cellannot[['consensus_cell_type']]
    )

### Run models for quick visualize/clustering

In [20]:
from pycisTopic.lda_models import run_cgs_models_mallet

#### mallet models

In [21]:
# set the memory limit for mallet (1GB default is not enough)
os.environ['MALLET_MEMORY'] = '100G'

In [22]:
f_mod_dir = 'models__consensus__mallet'
if not os.path.exists(os.path.join(wdir, f_mod_dir)):
    os.makedirs(os.path.join(wdir, f_mod_dir))

f_mod_tmpdir = '/scratch/leuven/325/vsc32528/tmp/mallet_cellds'
if not os.path.exists(f_mod_tmpdir):
    os.makedirs(f_mod_tmpdir)

n_topics = [2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
n_topics_merged = [2, 5, 10, 20, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]
n_iter = 500

In [None]:
# mallet implementation
models_dict = {}
for key in cistopic_obj_dict.keys():
    f_mod = os.path.join(wdir, f_mod_dir, key + '__models_' + str(n_iter) + '_iter.pkl')
    if os.path.isfile(f_mod):
        print(f"Loading {f_mod}")
        with open(f_mod, 'rb') as f:
            models_dict[key] = pickle.load(f)
    else:
        print(f"Running {key}")
        model = run_cgs_models_mallet(
                            'mallet',
                            cistopic_obj_dict[key],
                            n_topics=n_topics_merged if key=='merged' else n_topics ,
                            n_cpu=16,
                            n_iter=n_iter,
                            random_state=555,
                            alpha=50,
                            alpha_by_topic=True,
                            eta=0.1,
                            eta_by_topic=False,
                            tmp_path=os.path.join(f_mod_tmpdir, key + '_')
                            )
        # Save
        with open(f_mod, 'wb') as f:
            pickle.dump(model, f)
        print(f"Finished {key}")
        models_dict[key] = model

Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/models__consensus__mallet/Broad_1__models_500_iter.pkl
Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/models__consensus__mallet/Broad_2__models_500_iter.pkl
Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/models__consensus__mallet/Broad_mito_1__models_500_iter.pkl
Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/models__consensus__mallet/Broad_mito_2__models_500_iter.pkl
Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/data_freeze_Jun2021_cell_downsampled/atac_qc/jupyter/models__consensus__mallet/CNAG_1__models_500_iter.pkl
Loading /staging/leuven/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/d



2021-09-15 12:06:31,003 LDAMalletWrapper INFO     Training MALLET LDA with mallet train-topics --input /scratch/leuven/325/vsc32528/tmp/mallet_cellds/merged_corpus.mallet --num-topics 2  --alpha 50 --beta 0.1 --optimize-interval 0 --num-threads 16 --output-state /scratch/leuven/325/vsc32528/tmp/mallet_cellds/merged_44b1e6_state.mallet.gz --output-doc-topics /scratch/leuven/325/vsc32528/tmp/mallet_cellds/merged_44b1e6_doctopics.txt --output-topic-keys /scratch/leuven/325/vsc32528/tmp/mallet_cellds/merged_44b1e6_topickeys.txt --num-iterations 500 --inferencer-filename /scratch/leuven/325/vsc32528/tmp/mallet_cellds/merged_44b1e6_inferencer.mallet --doc-topics-threshold 0.0  --random-seed 555
2021-09-15 12:47:50,158 LDAMalletWrapper INFO     loading assigned topics from /scratch/leuven/325/vsc32528/tmp/mallet_cellds/merged_44b1e6_state.mallet.gz
2021-09-15 12:53:44,245 cisTopic     INFO     Model with 2 topics done!
2021-09-15 12:53:44,247 cisTopic     INFO     Running model with 5 topics
