# Notebook 10: Upload Corrected HG002 Assemblies<a class="tocSkip">

**Upload the final reassembled fastas as a submission**
    
    
**The steps that we will take are:**
1. Import Statements & Global Variable Definitions
2. Read In Data Tables
3. Select Correct Assemblies To Upload
4. Upload QC Data
5. Upload Raw Fastas + GFAs
6. Extract QC Metrics 

# Import Statements & Global Variable Definitions

In [45]:
%%capture
%pip install --upgrade --no-cache-dir git+https://github.com/DataBiosphere/ssds

In [1]:
import terra_notebook_utils as tnu
import terra_pandas as tp
import pandas as pd
import os
from ssds import deployment
from pprint import pprint
import gcsfs
import seaborn as sns
%matplotlib inline

examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.
  "found relative to the 'datapath' directory.".format(key))


## Function Definitions

In [2]:
def clean_quast_string(input_string):
    input_string = str(input_string)
    rstrip_str   = input_string.rstrip()
    rtn_str      = rstrip_str.split(' ')[-1]
    
    return rtn_str

## Global Variable Definitions

In [33]:
## submission_id is generated by uuid (gives unique ID to each submission)
submission_id       = "4B1C8C77-ACCA-4F82-899C-BB8C658C3613"

## submission_name lends some recognizability to the submission
submission_name     = "YEAR_1_ASSEMBLIES_CORRECTED_HG002"

ds = deployment._GSStaging(google_billing_project="firecloud-cgl")   # _GSStaging()
destintation_bucket = "gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf" # _GSStaging()

## Where the submission files will actually land (we may add one directory level later)
dest_full_path      = os.path.join(destintation_bucket, "submissions", submission_id + "--" + submission_name)

## Set Environment Variables

In [4]:
# Get the Google billing project name and workspace name
PROJECT = os.environ['WORKSPACE_NAMESPACE']
WORKSPACE =os.path.basename(os.path.dirname(os.getcwd()))
bucket = os.environ['WORKSPACE_BUCKET'] + "/"

# Verify that we've captured the environment variables
print("Billing project: " + PROJECT)
print("Workspace: " + WORKSPACE)
print("Workspace storage bucket: " + bucket)

Billing project: human-pangenome-ucsc
Workspace: HPRC_Reassembly
Workspace storage bucket: gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/


# Read In Data Tables

## Read In Original QC Samples

In [16]:
qc_sample_df = tp.table_to_dataframe("qc_sample")

qc_sample_df.head()

Unnamed: 0_level_0,asmgeneMaternalSummary,quastPaternalSummary,mat_ilmn,mat_misjoins,pat_misjoins,mat_final_fasta,mat_minigraph,child_ilmn,is_male,allResults,asmgenePaternalSummary,quastMaternalSummary,merquryQV,pat_ilmn,sample_name,dipcallVCF,dipcallBED,pat_minigraph,pat_final_fasta,yakSummary
qc_sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
HG002_downsampled,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG002,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...
HG002_full_v0.14,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG002,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/a...
HG00438,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00438,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...
HG005,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG005,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...
HG00621,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00621,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...


## Read In Corrected/Split QC Samples

In [17]:
qc_split_sample_df = tp.table_to_dataframe("qc_split_sample")

qc_split_sample_df.head()

Unnamed: 0_level_0,asmgeneMaternalSummary,quastPaternalSummary,mat_ilmn,mat_final_fasta,is_male,allResults,asmgenePaternalSummary,quastMaternalSummary,pat_ilmn,sample_name,dipcallVCF,dipcallBED,pat_final_fasta,yakSummary,child_ilmn,merquryQV
qc_split_sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
HG01123,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG01123,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/s...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,,
HG01358,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG01358,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/s...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...


## Read In Corrected HG002 Assemblies

In [18]:
hg002_ass_sample_df = tp.table_to_dataframe("hg002_qc_sample")

hg002_ass_sample_df.head()

Unnamed: 0_level_0,asmgeneMaternalSummary,quastPaternalSummary,mat_ilmn,mat_misjoins,pat_misjoins,mat_final_fasta,mat_minigraph,child_ilmn,is_male,allResults,asmgenePaternalSummary,quastMaternalSummary,merquryQV,pat_ilmn,sample_name,dipcallVCF,dipcallBED,pat_minigraph,pat_final_fasta,yakSummary
hg002_qc_sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
HG002_chem_v2,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG002,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/3...
HG002_full_v0.14.1,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG002,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...


In [19]:
## only keep final assembly that we will use...
hg002_ass_sample_df = hg002_ass_sample_df.drop('HG002_chem_v2')
hg002_ass_sample_df

Unnamed: 0_level_0,asmgeneMaternalSummary,quastPaternalSummary,mat_ilmn,mat_misjoins,pat_misjoins,mat_final_fasta,mat_minigraph,child_ilmn,is_male,allResults,asmgenePaternalSummary,quastMaternalSummary,merquryQV,pat_ilmn,sample_name,dipcallVCF,dipcallBED,pat_minigraph,pat_final_fasta,yakSummary
hg002_qc_sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
HG002_full_v0.14.1,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG002,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/d...,gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/3...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...


# Select Correct Assemblies To Upload

In [20]:
## Check how many rows we have
len(qc_sample_df.index)

48

In [21]:
qc_sample_df.drop(["HG01123", "HG01358", "HG002_downsampled", "HG002_full_v0.14"], inplace=True)

In [22]:
## Check how many rows we have now that we dropped three rows
len(qc_sample_df.index)

44

**Join the dataframes to get final samples for upload**

In [24]:
upload_df = pd.concat([qc_sample_df, qc_split_sample_df], sort=False)

In [25]:
upload_df = pd.concat([upload_df, hg002_ass_sample_df], sort=False)

In [26]:
## Check number of rows one last time (should be 47 now)
len(upload_df.index)

47

In [27]:
upload_df

Unnamed: 0,asmgeneMaternalSummary,quastPaternalSummary,mat_ilmn,mat_misjoins,pat_misjoins,mat_final_fasta,mat_minigraph,child_ilmn,is_male,allResults,asmgenePaternalSummary,quastMaternalSummary,merquryQV,pat_ilmn,sample_name,dipcallVCF,dipcallBED,pat_minigraph,pat_final_fasta,yakSummary
HG00438,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00438,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...
HG005,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG005,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...
HG00621,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00621,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...
HG00673,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00673,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...
HG00733,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00733,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...
HG00735,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00735,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...
HG00741,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG00741,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...
HG01071,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,False,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG01071,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...
HG01106,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG01106,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...
HG01109,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/9...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/b...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,True,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,[gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/...,HG01109,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/c...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...


## Upload Directly From GCP w/ SSDS

In [34]:
## Loop through rows of data table, upload fastas
for index, row in upload_df.iterrows():
    
    if row.name == "HG002_full_v0.14.1":
        sample_id   = row.name
        sample_name = row['sample_name']


        mat_fa_fp = row['mat_final_fasta']
        pat_fa_fp = row['pat_final_fasta']

        mat_fa_fn = os.path.basename(mat_fa_fp)
        pat_fa_fn = os.path.basename(pat_fa_fp)

        mat_subm_path = f"{sample_name}/assemblies/year1_freeze_assembly_v2.1/{mat_fa_fn}"
        pat_subm_path = f"{sample_name}/assemblies/year1_freeze_assembly_v2.1/{pat_fa_fn}"

        ds.copy(mat_fa_fp, submission_id, submission_name, mat_subm_path)
        ds.copy(pat_fa_fp, submission_id, submission_name, pat_subm_path)

2021-04-06 04:52:24::INFO  Copied gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/7b8c202c-a3e2-49fc-a4b3-7cd364667f08/finalizeAssembly/b10c043d-9add-4484-b6a9-466ec9ecdaaf/call-renameContigsAddMT/HG002.maternal.f1_assembly_v2.1.fa.gz to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/HG002.maternal.f1_assembly_v2.1.fa.gz
2021-04-06 04:52:36::INFO  Copied gs://fc-7acb866c-0074-455e-b6ac-ac5b3f4aa76b/35e446d1-11cd-42f7-a1cc-88eb89a25fe9/finalizeAssembly/4df23175-966c-4346-8b6c-f34509d64724/call-renameContigsAddMT/HG002.paternal.f1_assembly_v2.1.fa.gz to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/HG002.paternal.f1_assembly_v2.1.fa.gz


# Upload QC Data
## Download Then Extract QC Tarballs

In [37]:
! mkdir hg002_qc_data
%cd hg002_qc_data

/home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data


In [38]:
qc_upload_df = upload_df.copy()

In [39]:
for index, row in qc_upload_df.iterrows():
    
    if row.name == "HG002_full_v0.14.1":
        sample_name = row['sample_name']

        ## Copy down QC results
        results_tar_fp = row['allResults']
        ! gsutil cp {results_tar_fp} .

        ## Unzip and remove original tar.gz file
        qc_tar_fn = os.path.basename(results_tar_fp)
        ! tar xzvf {qc_tar_fn}
        ! rm {qc_tar_fn}

        #filename, file_extension = os.path.splitext(qc_tar_fn)
        filename     = os.path.basename(qc_tar_fn)
        index_of_dot = filename.index('.')
        folder_name  = filename[:index_of_dot]

        ## Move to a more standard name
        ! mv {folder_name} {sample_name}/assembly_qc

Copying gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e3ef38c8-3438-4acb-9785-d51698a2d6b7/standardQualityControl/bf6eedec-4207-440b-bb95-191d32d61d5b/call-consolidate/glob-86aa2126671e0b3e99d2a48a23fb36d9/HG002_StandardQC.tar.gz...
- [1 files][ 14.1 GiB/ 14.1 GiB]   39.3 MiB/s                                   
Operation completed over 1 objects/14.1 GiB.                                     
HG002_StandardQC/
HG002_StandardQC/dipcall_v0.1/
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap2.bam
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap1.sam.gz.log
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap1.sam.gz
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.pair.vcf.gz
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap1.bed
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.dip.bed
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.dip.vcf.gz
HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap2.var.gz
HG002_StandardQC/dipcall_v0.1/HG002.f1_asse

HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.sam.gz.log
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.sam.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.pair.vcf.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.bed
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.dip.bed
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.dip.vcf.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap2.var.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.var.gz.vst
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap2.sam.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap2.var.gz.vst
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap2.paf.gz.log
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap2.paf.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.var.gz
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap2.bed
HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.bam
HG002_StandardQC/dipcal

In [40]:
! ls | wc -l

1


## Update Submission

In [43]:
! ls

HG002_StandardQC


In [72]:
! ssds staging upload \
    --deployment gcp \
    --submission-id 4B1C8C77-ACCA-4F82-899C-BB8C658C3613 \
    --subdir HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc \
    HG002_StandardQC

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/quast/pat/report.pdf to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/quast/pat/report.pdf
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/quast/pat/transposed_report.txt to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/quast/pat/transposed_report.txt
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/quast/pat/report.tex to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/ass

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/quast/mat/basic_stats/NGx_plot.pdf to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/quast/mat/basic_stats/NGx_plot.pdf
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/quast/mat/basic_stats/Nx_plot.pdf to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/quast/mat/basic_stats/Nx_plot.pdf
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/quast/mat/basic_stats/HG002.maternal.f1_assembly_v2.1_GC_content_plot.pdf to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C36

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.paf.gz.log to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.paf.gz.log
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.sam.gz.log to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/dipcall_v0.2/HG002.f1_assembly_v2.1.hap1.sam.gz.log
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.altHap.maternal.hapmers.spectra-cn.fl.png to gs://fc-4310e7

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.asm.100_20000.switches.txt to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.asm.100_20000.switches.txt
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.altHap.contig.sizes to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.altHap.contig.sizes
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.asm.100_20000.pha

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.asm.only.hist to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.asm.only.hist
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.asm.continuity.N.png to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.asm.continuity.N.png
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.altHap.qv to gs://fc-4310e737-a388-4a10-8

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.altHap.maternal.hapmers.tdf to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.altHap.maternal.hapmers.tdf
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.altHap.paternal.hapmers.spectra-cn.st.png to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.altHap.paternal.hapmers.spectra-cn.st.png
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.altHap.maternal.hapmers.spectra-hap-cn.hist to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.altHap.maternal.hapmers.spectra-hap-cn.hist
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/merqury/HG002.f1_assembly_v2.1.merqury.asm.paternal.hapmers.spectra-hap-cn.hist to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/merqury/HG002.f1_assembly_v2.1.merqury.asm.paternal.hapmers.spectra-hap-cn.hist
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_da

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.pair.vcf.gz to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/dipcall_v0.1/HG002.f1_assembly_v2.1.pair.vcf.gz
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap1.bam to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/year1_freeze_assembly_v2.1/assembly_qc/dipcall_v0.1/HG002.f1_assembly_v2.1.hap1.bam
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_qc_data/HG002_StandardQC/dipcall_v0.1/HG002.f1_assembly_v2.1.hap2.sam.gz.log to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77

# Upload Raw Fastas + GFAs
## Pull Raw Fastas From Sample Table

In [48]:
%cd ..

/home/jupyter-user/notebooks/HPRC_Reassembly/edit


In [49]:
! mkdir hg002_raw
%cd hg002_raw

/home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw


In [50]:
sample_df = tp.table_to_dataframe("sample")

sample_df.loc["HG002_full_v0.14.1"]

QCstats                         gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/e...
mat_ilmn                                                                      NaN
hifi                                                                          NaN
paternal_id                                                                 HG003
matYak                          gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/y...
maternal_id                                                                 HG004
patYak                          gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/y...
nanopore                                                                      NaN
pat_ilmn                                                                      NaN
maternalFastaGz                 gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/H...
paternalFastaGz                 gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/H...
maternalContigGfaTarGz                                                        NaN
paternalContigGf

In [51]:
!mkdir hifiasm_v0.14_raw
%cd hifiasm_v0.14_raw

/home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw/hifiasm_v0.14_raw


In [56]:
mat_fa_fp = sample_df.loc["HG002_full_v0.14.1", 'maternalFastaGz']
pat_fa_fp = sample_df.loc["HG002_full_v0.14.1", 'paternalFastaGz']

! gsutil cp {mat_fa_fp} .
! gsutil cp {pat_fa_fp} .

Copying gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/HG002_FULL_V0.14.1/HG002-full-0.14.1.mat.fa.gz...
\ [1 files][852.4 MiB/852.4 MiB]   41.0 MiB/s                                   
Operation completed over 1 objects/852.4 MiB.                                    
Copying gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/HG002_FULL_V0.14.1/HG002-full-0.14.1.pat.fa.gz...
- [1 files][818.5 MiB/818.5 MiB]   50.1 MiB/s                                   
Operation completed over 1 objects/818.5 MiB.                                    


## Manually Pull In GFAs

In [91]:
! ls

HG002-full-0.14.1.mat.fa.gz  HG002-full-0.14.1.pat.fa.gz


In [92]:
!mkdir HG002.pat.contig_gfa
%cd HG002.pat.contig_gfa

! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap1.p_ctg.gfa .
! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap1.p_ctg.lowQ.bed .
! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap1.p_ctg.noseq.gfa .

%cd ..
!mkdir HG002.mat.contig_gfa
%cd HG002.mat.contig_gfa

! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap2.p_ctg.gfa .
! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap2.p_ctg.lowQ.bed .
! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap2.p_ctg.noseq.gfa .
    
%cd ..
!mkdir HG002.raw_unitig_gfa
%cd HG002.raw_unitig_gfa

! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.dip.r_utg.gfa .
! gsutil -u firecloud-cgl cp gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.dip.r_utg.noseq.gfa .

/home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw/hifiasm_v0.14.1_raw/HG002.pat.contig_gfa
Copying gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap1.p_ctg.gfa...
/ [1 files][  2.8 GiB/  2.8 GiB]   40.5 MiB/s                                   
Operation completed over 1 objects/2.8 GiB.                                      
CommandException: No URLs matched: gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap1.p_ctg.lowQ.bed
Copying gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4DE3A234-8903-44C7-8BEF-86C5AF73F5B1--HG002-ASSEMBLY/HG002-full-0.14.1.hap1.p_ctg.noseq.gfa...
\ [1 files][ 82.2 MiB/ 82.2 MiB]                                                
Operation completed over 1 objects/82.2 MiB.                                     
/home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw/hifiasm_v0.14.1_

## Update Submission

In [93]:
%cd ../..
! ls

/home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw
hifiasm_v0.14.1_raw


In [64]:
! mv hifiasm_v0.14_raw hifiasm_v0.14.1_raw

In [94]:
! ssds staging upload \
    --deployment gcp \
    --submission-id 4B1C8C77-ACCA-4F82-899C-BB8C658C3613 \
    --subdir HG002/assemblies/hifiasm_v0.14.1_raw \
    hifiasm_v0.14.1_raw

INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw/hifiasm_v0.14.1_raw/HG002-full-0.14.1.mat.fa.gz to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/hifiasm_v0.14.1_raw/HG002-full-0.14.1.mat.fa.gz
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw/hifiasm_v0.14.1_raw/HG002-full-0.14.1.pat.fa.gz to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/hifiasm_v0.14.1_raw/HG002-full-0.14.1.pat.fa.gz
INFO:ssds.storage:Copied /home/jupyter-user/notebooks/HPRC_Reassembly/edit/hg002_raw/hifiasm_v0.14.1_raw/HG002.mat.contig_gfa/HG002-full-0.14.1.hap2.p_ctg.gfa to gs://fc-4310e737-a388-4a10-8c9e-babe06aaf0cf/submissions/4B1C8C77-ACCA-4F82-899C-BB8C658C3613--YEAR_1_ASSEMBLIES_CORRECTED_HG002/HG002/assemblies/hifiasm_v0.14.1_raw/HG002.mat.co

# Extract QC Metrics

## Add QUAST Info

In [73]:
qc_extract_df = upload_df.copy()

In [74]:
metric_df = qc_extract_df[['sample_name', 'dipcallBED', 'dipcallVCF']].copy()


metric_df["mat_num_contigs"] = ""
metric_df["mat_total_len"]   = ""
metric_df["mat_N50"]         = ""
metric_df["mat_L50"]         = ""

metric_df["pat_num_contigs"] = ""
metric_df["pat_total_len"]   = ""
metric_df["pat_N50"]         = ""
metric_df["pat_L50"]         = ""

In [75]:
for index, row in qc_extract_df.iterrows():
    
    mat_quast_fp = row['quastMaternalSummary']
    mat_quast_df = pd.read_csv(mat_quast_fp, sep='\t')

    metric_df['mat_num_contigs'][index] = clean_quast_string(mat_quast_df.iloc[13, ].values[0])
    metric_df['mat_total_len'][index]   = clean_quast_string(mat_quast_df.iloc[7, ].values[0])
    metric_df['mat_N50'][index]         = clean_quast_string(mat_quast_df.iloc[18, ].values[0])
    metric_df['mat_L50'][index]         = clean_quast_string(mat_quast_df.iloc[22, ].values[0])   
    
    ## Paternal
    pat_quast_fp = row['quastPaternalSummary']
    pat_quast_df = pd.read_csv(pat_quast_fp, sep='\t')
    
    metric_df['pat_num_contigs'][index] = clean_quast_string(pat_quast_df.iloc[13, ].values[0])
    metric_df['pat_total_len'][index]   = clean_quast_string(pat_quast_df.iloc[7, ].values[0])
    metric_df['pat_N50'][index]         = clean_quast_string(pat_quast_df.iloc[18, ].values[0])
    metric_df['pat_L50'][index]         = clean_quast_string(pat_quast_df.iloc[22, ].values[0])

## Add asmgene info

In [76]:
metric_df["mat_full_sgl"]     = ""
metric_df["mat_full_dup"]     = ""
metric_df["mat_frag"]         = ""

metric_df["pat_full_sgl"]     = ""
metric_df["pat_full_dup"]     = ""
metric_df["pat_frag"]         = ""

In [77]:
for index, row in qc_extract_df.iterrows():
    ## Maternal
    mat_asm_fp = row['asmgeneMaternalSummary']
    mat_asm_df = pd.read_csv(mat_asm_fp, sep='\t')

    is_full_sgl_val = mat_asm_df['Metric'] == 'full_sgl'
    is_full_dup_val = mat_asm_df['Metric'] == 'full_dup'
    is_frag_val     = mat_asm_df['Metric'] == 'frag'

    metric_df["mat_full_sgl"][index] = mat_asm_df[is_full_sgl_val].iloc[:,3].values[0]
    metric_df["mat_full_dup"][index] = mat_asm_df[is_full_dup_val].iloc[:,3].values[0]
    metric_df["mat_frag"][index]     = mat_asm_df[is_frag_val].iloc[:,3].values[0]

    
    ## Paternal
    pat_asm_fp = row['asmgenePaternalSummary']
    pat_asm_df = pd.read_csv(pat_asm_fp, sep='\t')

    is_full_sgl_val = pat_asm_df['Metric'] == 'full_sgl'
    is_full_dup_val = pat_asm_df['Metric'] == 'full_dup'
    is_frag_val     = pat_asm_df['Metric'] == 'frag'

    metric_df["pat_full_sgl"][index] = pat_asm_df[is_full_sgl_val].iloc[:,3].values[0]
    metric_df["pat_full_dup"][index] = pat_asm_df[is_full_dup_val].iloc[:,3].values[0]
    metric_df["pat_frag"][index]     = pat_asm_df[is_frag_val].iloc[:,3].values[0]

## Add YAK Info

In [78]:
metric_df["mat_switch_err_rate"]  = ""
metric_df["mat_hamming_err_rate"] = ""
metric_df["mat_qv"]               = ""

metric_df["pat_switch_err_rate"]  = ""
metric_df["pat_hamming_err_rate"] = ""
metric_df["pat_qv"]               = ""

In [79]:
for index, row in qc_extract_df.iterrows():
    ## Maternal
    yak_fp = row['yakSummary']
    yak_df = pd.read_csv(yak_fp, sep=',', header=None)
    
    ## Where we run QV calculations (have child Illumina data)
    if yak_df.iloc[0,0] == '# mat qv':
        metric_df["mat_switch_err_rate"][index]  = yak_df.iloc[11,:].values[0].split('\t')[3]
        metric_df["mat_hamming_err_rate"][index] = yak_df.iloc[12,:].values[0].split('\t')[3]
        metric_df["mat_qv"][index]               = yak_df.iloc[4,:].values[0].split('\t')[2]

        metric_df["pat_switch_err_rate"][index]  = yak_df.iloc[15,:].values[0].split('\t')[3]
        metric_df["pat_hamming_err_rate"][index] = yak_df.iloc[16,:].values[0].split('\t')[3]
        metric_df["pat_qv"][index]               = yak_df.iloc[9,:].values[0].split('\t')[2]
    
    ## Where we don't run QV calculations (do not have child Illumina data)
    else:
        metric_df["mat_switch_err_rate"][index]  = yak_df.iloc[1,:].values[0].split('\t')[3]
        metric_df["mat_hamming_err_rate"][index] = yak_df.iloc[2,:].values[0].split('\t')[3]
        metric_df["mat_qv"][index]               = "Not_Calc"

        metric_df["pat_switch_err_rate"][index]  = yak_df.iloc[5,:].values[0].split('\t')[3]
        metric_df["pat_hamming_err_rate"][index] = yak_df.iloc[6,:].values[0].split('\t')[3]
        metric_df["pat_qv"][index]               = "Not_Calc"

In [80]:
metric_df

Unnamed: 0,sample_name,dipcallBED,dipcallVCF,mat_num_contigs,mat_total_len,mat_N50,mat_L50,pat_num_contigs,pat_total_len,pat_N50,...,mat_frag,pat_full_sgl,pat_full_dup,pat_frag,mat_switch_err_rate,mat_hamming_err_rate,mat_qv,pat_switch_err_rate,pat_hamming_err_rate,pat_qv
HG00438,HG00438,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,259,3035929804,54936949,19,278,3025358313,48061544,...,10,34727,131,9,0.009714,0.010295,52.836,0.009502,0.008803,52.845
HG005,HG005,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/5...,541,3031523002,69736411,17,811,2942569812,58303677,...,9,34667,129,9,0.006806,0.005658,53.090,0.002831,0.002081,52.982
HG00621,HG00621,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,254,3023109422,50294217,19,295,2906296479,54673245,...,8,34679,142,8,0.007039,0.005985,52.838,0.004673,0.005548,53.015
HG00673,HG00673,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,353,3053794682,29077043,35,436,2926307565,34843587,...,11,34593,189,14,0.009603,0.009333,53.151,0.003116,0.002701,53.352
HG00733,HG00733,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/0...,617,3026801692,44647564,24,755,3043880851,40994157,...,11,34722,149,11,0.009791,0.009964,54.823,0.00976,0.00933,54.558
HG00735,HG00735,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,251,3038195502,56474489,18,321,3034561332,53422923,...,7,34671,145,11,0.00665,0.00712,52.720,0.009743,0.00895,52.711
HG00741,HG00741,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,307,3037308484,41001116,23,311,3031070223,51040418,...,16,34706,130,12,0.005852,0.005426,52.511,0.010897,0.012607,52.363
HG01071,HG01071,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,238,3012820263,50125412,17,367,3059213820,55592407,...,14,34606,162,17,0.009084,0.011651,51.285,0.003776,0.004007,51.147
HG01106,HG01106,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,283,3036387987,47714433,20,310,2928126274,57173280,...,7,34735,136,8,0.004466,0.009752,53.394,0.003519,0.003244,53.751
HG01109,HG01109,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/7...,420,3036685266,32308382,32,543,2924871631,30220240,...,16,34672,141,16,0.007593,0.015466,51.365,0.002921,0.004164,51.575


## Copy Metrics As CSV To Bucket

In [81]:
output_metric_fn = "2021_04_06_Y1_assemblies_QC.csv"
metric_df.to_csv(output_metric_fn, index=False)

In [82]:
! gsutil cp {output_metric_fn} {bucket}{output_metric_fn}

Copying file://2021_04_06_Y1_assemblies_QC.csv [Content-Type=text/csv]...
/ [1 files][ 28.5 KiB/ 28.5 KiB]                                                
Operation completed over 1 objects/28.5 KiB.                                     


In [84]:
bucket

'gs://fc-0c2122a8-6725-4199-b90e-828ab006078f/'