# Example usage

In [1]:
import demux_cellraft_updated as demux

## 1) Demuxing sequencing reads

**Inputs:** 

    1) List of fastq files to demux
        
    2) Output directory where files will be saved
    
    Optional:
    
    3) List of barcodes used for PCR1 indexing. Default are: 
        ['ACAAGT', 'AAGCAA', 'TCCTGT', 'TGGTCC', 'ATGACC', 'CAGCTT', 'GGATAC', 'GGCTTG', 'other']
        
**Outputs:**

    1) Individual Fastq files for each assigned barcode. 
    
    Naming convention: All characters before the first underscore in fastq file name will be the name, followed by barcode name assigned from the following assignments:
        {'ACAAGT': '130',
         'AAGCAA': '131',
         'TCCTGT': '145',
         'TGGTCC': '133',
         'ATGACC': '142',
         'CAGCTT': '143',
         'GGATAC': '144',
         'GGCTTG': '132',
         'other': 'other'}
         
    2) Summary file with count of reads assigned to each barcode (Filename_barcode_metrics.csv)
    
    3) Master summary file with count of reads assigned to each barcode for every fastq sample included in the list of input files (all_metrics_summary.csv)

In [2]:
fastq_dir = "input_data/"
fastq_files = [
    fastq_dir+'SG8_S31_L003_R1_001.fastq.gz', 
    fastq_dir+'SG9_S42_L004_R1_001.fastq.gz'
]
demux.master(fastq_files, 'output_data/')

## 2) Assign gRNA identity##

**Inputs:**

    1) Manifest with sampleIDs and PCR1 barcodes used. Tab-separated text file with two columns labeled Sample and Index. 
    Multiple indices are separated with commas.
    
        Example: 
        Sample  Index
        SG8     142,143,144,145
        SG9     130,131,132,133
        
    2) Directory where demuxed files (used as output from demux.master above)
    
**Outputs:**
    
    Pandas dataframe containing read counts for all gRNAs identified in the sequencing reads. 
    Also includes counts of total reads in the library, reads with a gRNA properly assigned, and reads that did not contain a gRNA insert. 
    
    

In [3]:
import cellraft_guide_id_updated as guideid

sample_manifest = 'input_data/sample_manifest.txt'

summary = guideid.process_all_samples(sample_manifest, 'output_data/')
summary.to_csv('output_data/gRNA_assignment_summary.csv')