# PureCN Curation
Notebook to select PureCN absolute copy number calls for manual curation.

William Colgan (wcolgan@broadinstitute.org)

In [None]:
# Load packages

import pandas as pd
import numpy as np
import dalmatian as dm
import os
pd.options.mode.chained_assignment = None

In [None]:
# Pipeline parameters
WORKSPACE = "broad-firecloud-ccle/DepMap_WES_CN_hg38"
MIN_GOF = 70
MAX_PLOIDY = 5

## Select calls to Curate
Goodness of fit < MIN_GOR and not Non-aberrant or Ploidy > MAX_PLOIDY

In [None]:
wm = dm.WorkspaceManager(WORKSPACE)
samples_df = wm.get_sample()
#samples_df = pd.read_csv("~/Downloads/sample.tsv",sep = "\t")

In [None]:
samples_df['PureCN_gof'] = samples_df.PureCN_comment.str.extract(r'([0-9]+)',expand = True).fillna(100).astype(int)
samples_df['Non_aberrant'] = samples_df.PureCN_comment.str.contains("NON-ABERRANT").astype(bool)

In [None]:
to_curate = samples_df[((samples_df.PureCN_gof < MIN_GOF) & ~samples_df.Non_aberrant) | \
                       (samples_df.PureCN_ploidy > MAX_PLOIDY)]
to_curate["PureCN_curated_solution"] = ""
to_curate["PureCN_failed"] = ""
to_curate["PureCN_curated"] = True
to_curate.loc[:,['entity:sample_id','PureCN_ploidy','PureCN_comment','PureCN_curated', \
           'PureCN_curated_solution','PureCN_failed']].to_csv("~/Desktop/to_curate.csv") 

## Download Solution PDFs

In [None]:
os.system("gsutil -m cp "+to_curate.PureCN_solutions_pdf.str.cat(sep = " ")+ " ~/Desktop/solutions/")

## Update Sample Sheet

In [None]:
curated = pd.read_csv("~/Desktop/to_curate.csv",index_col = 0)
samples_df = samples_df.drop(['PureCN_curated'], axis = 1).join( \
                  curated[['PureCN_curated','PureCN_curated_solution','PureCN_failed']])
samples_df['PureCN_curated'] = samples_df['PureCN_curated'].fillna(False)
samples_df['PureCN_failed'] = samples_df['PureCN_failed'].fillna(False)

In [None]:
wm.upload_samples(samples_df.drop(['PureCN_gof','Non_aberrant'], axis = 1))
#samples_df.drop(['PureCN_gof','Non_aberrant'], axis = 1).to_csv("~/Desktop/sample.tsv",sep = "\t",index=False)