# Loading the necessary packages

In [1]:
import depmapomics.patch_firecloud
depmapomics.patch_firecloud.install_patches()

In [2]:
from __future__ import print_function

from depmapomics import constants
from depmapomics import env_config

from depmapomics import dm_omics
from depmapomics import mutations as omics_mut
from depmapomics import copynumbers as omics_cn
from depmapomics import fingerprinting as fp

from mgenepy import terra
import dalmatian as dm
from bokeh.plotting import output_notebook

%load_ext autoreload
%autoreload 2

output_notebook()

ModuleNotFoundError: No module named 'gumbo_client'

In [None]:
isCCLE = True

# Run pipeline on Terra

We are using Dalmatian to send requests to Terra. See [our readme](https://github.com/broadinstitute/depmap_omics/blob/master/documentation/DepMap_processing_pipeline.md) for detailed breakdown of the subtasks in our WGS pipeline.

# Loading new data

- Sample onboarding is automated in the [dogspa](https://github.com/broadinstitute/dogspa) repo.
- The [omics-wgs-pipeline](https://github.com/broadinstitute/omics-wgs-pipeline) will be where production WGS pipeline code is stored and eventually run using continuous delivery. So far, this only includes the realignment/preprocessing pipeline. The `refresh-legacy-terra-samples` command in that repo will populate the "legacy" WGS workspace with new alignment-ready BAMs and other sample data table columns needed for downstream workflows.

For non internal users, please make sure that your workspace is correctly setup

To set up your workspace, follow the instructions in the README page.

In [None]:
print("running Terra pipeline")
refwm = dm.WorkspaceManager(env_config.WGSWORKSPACE)
submission_id = refwm.create_submission("WGS_pipeline", constants.SAMPLESETNAME, 'sample_set', expression='this.samples')
await terra.waitForSubmission(env_config.WGSWORKSPACE, submission_id)

In [None]:
submission_id = refwm.create_submission("Aggregate_CN_seg_files", 'all')
await terra.waitForSubmission(env_config.WGSWORKSPACE, submission_id)

In [None]:
submission_id = refwm.create_submission("aggregate_microsatellite_repeats", 'all')
await terra.waitForSubmission(env_config.WGSWORKSPACE, submission_id)

### Save the workflow configurations used

In [None]:
terra.saveWorkspace(env_config.WGSWORKSPACE,'data/'+constants.SAMPLESETNAME+'/WGSconfig/')

# Postprocessing on local


### Copy Number

In [None]:
wgs_wm = dm.WorkspaceManager(env_config.WGSWORKSPACE)
wgs_samples = wgs_wm.get_samples()
wgs_purecn = wgs_samples[(~wgs_samples.PureCN_loh.isna()) & (wgs_samples.PureCN_loh != "NA")].index.tolist()
wgs_wm.update_sample_set(sample_set_id="PureCN", sample_ids=wgs_purecn)

In [None]:
if isCCLE:
    wespriosegs, wgspriosegs = await dm_omics.cnPostProcessing(samplesetname=constants.SAMPLESETNAME, wesrefworkspace=env_config.WESCNWORKSPACE, wgsrefworkspace=env_config.WGSWORKSPACE, dryrun=False, useCache=False)
else:
    segments, genecn, failed, purecn_segments, purecn_genecn, loh_status, feature_table = await omics_cn.postProcess(env_config.WGSWORKSPACE, sampleset=constants.SAMPLESETNAME)

### Somatic Mutations

In [None]:
if isCCLE:
    await dm_omics.mutationPostProcessing(wesrefworkspace=env_config.WESCNWORKSPACE, wgsrefworkspace=env_config.WGSWORKSPACE, run_guidemat=True, run_sv=True, mafcol="depmap_maf_25q2")
else:
    await omics_mut.postProcess(env_config.WGSWORKSPACE, samplesetname=constants.SAMPLESETNAME)

# Subset and upload

Based on release dates and embargo status in gumbo, subset and upload datasets for each release audience, and hand off to the portal team.

In [None]:
from depmap_omics_upload import tracker
from depmap_omics_upload import upload
from mgenepy.utils import helper as h

In [None]:
from datetime import date
import datetime
release_date = datetime.date(2023, 11, 3)

In [None]:
virtual = upload.initVirtualDatasets(samplesetname=constants.SAMPLESETNAME)

In [None]:
virtual

In [None]:
upload.checkDataPermission()

In [None]:
upload.uploadAuxTables(taiga_ids=virtual, today=release_date)

In [None]:
upload.makeModelLvMatrices(virtual_ids=virtual, today=release_date)

In [None]:
upload.makePRLvMatrices(virtual_ids=virtual, today=release_date)

In [None]:
upload.makeWESandWGSMatrices(virtual_ids=virtual, today=release_date)

In [None]:
upload.updateEternal(virtual=virtual)

# Managing release readmes

In [None]:
# ! cd .. && git clone https://github.com/broadinstitute/depmap-release-readmes.git && cd -

In [None]:
! cd ../depmap-release-readmes && git pull --no-commit

In [None]:
!cd ../depmap-release-readmes/ && python3 make_new_release.py $constants.RELEASE  && git add . && git commit -m $constants.RELEASE && git push

### Saving workspace configs

In [None]:
! terra-sync export broad-firecloud-ccle/DepMap_WGS_CN data/$constants.SAMPLESETNAME/WGSconfig

In [None]:
! terra-sync export broad-firecloud-ccle/DepMap_hg38_RNAseq data/$constants.SAMPLESETNAME/RNAconfig

In [None]:
! cd data/$constants.SAMPLESETNAME/WGSconfig && mv */*/* . && rm -r configs/

In [None]:
! cd data/$constants.SAMPLESETNAME/RNAconfig && mv */*/* . && rm -r configs/