# We need to count the edits across 3'UTRs to compare bulk data to single cell.
- Just like for single cell and bulk, we're converting (aggr) edit BED files to BAM files and using those as inputs to featureCounts. 
- Using 10X three_prime_utr annotations, generate a SAF file using just 3'UTRs and count edits along these regions only.
- Just so we have everything, we're counting edits in every single dataset, resulting in a single counts.txt file.

In [1]:
%matplotlib inline

import glob
import os
import pandas as pd
import gffutils
import pysam
import pybedtools
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from collections import OrderedDict
from tqdm import tnrange, tqdm_notebook

pd.set_option('display.max_columns', 50)

# Important! Filter the edit file for conf score
- Since we're counting the number of edits found, we'll need to set a cutoff at which we call sites "edited." 

In [2]:
conf=0.9

In [3]:
input_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info'
output_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts{}'.format(conf)
tmp_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts{}/tmp'.format(conf)

# Convert BED to BAM
- every 'alignment' is an edit site. 
- .fx means that the 'chr' is affixed to the ensembl-style chromosome. We don't actually want that (actually grabbing the 'nonfx' files), but I can't figure out a way to glob while excluding these fx bedfiles. So I'm going to glob these and assume a specific naming schema. Dumb, I know.

In [4]:
all_bed_files = []
_ = sorted(glob.glob(os.path.join(input_dir, '*.fx.bed'))) 
for b in _:
    all_bed_files.append(b.replace('.fx.bed','.bed'))
    assert os.path.exists(b.replace('.fx.bed','.bed'))
print(len(all_bed_files))
all_bed_files[:3]

220


['/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/APOBEC-STAMP_possorted_genome_bam-APOBEC_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.bed',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/APOBEC-STAMP_possorted_genome_bam_MD-1_barcodes.tsv.bed',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/APOBEC-STAMP_possorted_genome_bam_MD-2_barcodes.tsv.bed']

In [5]:
def filter_bed(input_bed, output_bed, conf):
    """
    Filters the BED file 
    """
    edit_head = ['chrom','start','end','conf','frac','strand']
    df = pd.read_table(input_bed, names=edit_head)
    df = df[df['conf']>=conf]
    df.to_csv(output_bed, sep='\t', header=False, index=False)

# Basically filter for conf, perform bedToBam and samtools sort on these guys.

In [6]:
genome = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/inputs/refdata-cellranger-hg19_lenti_common-3.0.0/star/chrNameLength.txt'
progress = tnrange(len(all_bed_files))
for bed in all_bed_files:
    output_bam = os.path.join(tmp_dir, os.path.basename(bed).replace('.bed','.bam'))
    output_sorted_bam = os.path.join(tmp_dir, os.path.basename(bed).replace('.bed','.sorted.bam'))
    filter_fn = os.path.join(tmp_dir, os.path.basename(bed) + ".{}.bed".format(conf))
    filter_bed(bed, filter_fn, conf)
    cmd = 'bedToBam '
    cmd += '-i {} '.format(filter_fn)
    cmd += '-g {} '.format(genome)
    cmd += '> {}'.format(output_bam)
    # print(cmd)  # debug
    ! $cmd
    sort_cmd = 'samtools sort {} > {}'.format(output_bam, output_sorted_bam)
    ! $sort_cmd
    # print(sort_cmd)
    progress.update(1)

HBox(children=(IntProgress(value=0, max=220), HTML(value='')))

  
  if (await self.run_code(code, result,  async_=asy)):


# Run featureCounts to get the number of edits assigned to each gene.

In [7]:
gtf = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/inputs/refdata-cellranger-hg19_lenti_common-3.0.0/genes/genes.gtf'

In [8]:
cmd = 'module load subreadfeaturecounts;featureCounts '
cmd += '-a {} '.format(gtf)
cmd += '-s 1 '
# cmd += '-O '
cmd += '-o {}'.format(os.path.join(output_dir, 'counts_at_conf_{}.txt '.format(conf)))
cmd += '-R CORE '
cmd += os.path.join(tmp_dir, '*.sorted.bam')
cmd += ' > counts.log 2>&1'

print("Command: [{}]".format(cmd))
print("Writing to: {}".format(os.path.join(output_dir, 'counts_at_conf_{}.txt '.format(conf))))

! $cmd

Command: [module load subreadfeaturecounts;featureCounts -a /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/inputs/refdata-cellranger-hg19_lenti_common-3.0.0/genes/genes.gtf -s 1 -o /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/counts_at_conf_0.9.txt -R CORE /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/*.sorted.bam > counts.log 2>&1]
Writing to: /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/counts_at_conf_0.9.txt 


# Let's run featureCounts using 3utrs also, in addition to the whole gene.

### First make an annotation SAF file from our GTF file by simply grepping for three_prime_utr regions.

In [9]:
gtf_3utr_file = os.path.join(tmp_dir, 'genes.three_prime_utr_only.gtf')

! grep --color 'three_prime_utr' $gtf > $gtf_3utr_file

In [10]:
gtf_3utr = pd.read_csv(gtf_3utr_file, names=['chrom','src','region','start','end','.','strand','.','attr'], sep='\t')
gtf_3utr['geneid'] = gtf_3utr['attr'].str.extract("gene_id \"([\w\d\.]+)\"")
gtf_3utr = gtf_3utr[['geneid','chrom','start','end','strand']]
gtf_3utr.head()

  return _read(filepath_or_buffer, kwds)


Unnamed: 0,geneid,chrom,start,end,strand
0,ENSG00000237683,1,137621,138529,-
1,ENSG00000237683,1,134901,135802,-
2,ENSG00000235249,1,368598,368634,+
3,ENSG00000185097,1,621059,621095,-
4,ENSG00000187634,1,879534,879955,+


In [11]:
gtf_3utr.to_csv(
    os.path.join(input_dir, 'genes.three_prime_utr_only.SAF'),
    sep='\t',
    index=False,
    header=False
)

In [12]:
saf = os.path.join(input_dir, 'genes.three_prime_utr_only.SAF')

### Run featureCounts

In [13]:
cmd = 'module load subreadfeaturecounts;featureCounts '
cmd += '-a {} '.format(saf)
cmd += '-F SAF '
cmd += '-s 1 '
cmd += '-o {}'.format(os.path.join(output_dir, 'counts_at_conf_{}.three_prime_utr.txt '.format(conf)))
cmd += '-R CORE '
cmd += os.path.join(tmp_dir, '*.sorted.bam')
cmd += ' > three_prime_utr_counts.log 2>&1'

print("Command is [{}]".format(cmd))
print("Writing to: {}".format(os.path.join(output_dir, 'counts_at_conf_{}.three_prime_utr.txt '.format(conf))))

! $cmd

Command is [module load subreadfeaturecounts;featureCounts -a /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/genes.three_prime_utr_only.SAF -F SAF -s 1 -o /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/counts_at_conf_0.9.three_prime_utr.txt -R CORE /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/*.sorted.bam > three_prime_utr_counts.log 2>&1]
Writing to: /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/counts_at_conf_0.9.three_prime_utr.txt 


### One of these groups had a really low gene assignment %, let's check it out
- The offending sample is the 10X Apo aggregate of all cells
- Looking at the alignments on IGV, mostly seems to be: annotated as 'intronic', on opposite strand, or possibly in an unannotated UTR/exon

In [14]:
df = pd.read_csv(os.path.join(output_dir, 'counts_at_conf_{}.txt.summary'.format(conf)), sep='\t')
# df.columns = [c.replace(tmp_dir, '') for c in df.columns]
df

Unnamed: 0,Status,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam-APOBEC_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-1_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-2_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-5_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-APOBEC_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/For_Motif_ALL_edits_barcodes_0_1.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/For_Motif_Background_APO_plus_RBFOX2-TIA1_APO_edits_barcodes_4_5.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-APOBEC-STAMP_possorted_genome_bam_MD-19_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-APOBEC-STAMP_possorted_genome_bam_MD-20_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-21_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-22_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-23_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-24_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-25_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-26_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-30_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-31_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/HEK-NPC-RBFOX2-STAMP_possorted_genome_bam_MD-32_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_possorted_genome_bam_MD-33_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_possorted_genome_bam_MD-34_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-0_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-27_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-28_barcodes.tsv.sorted.bam,...,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-6_barcodes_noCS.rand50.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-6_barcodes_noCS.rand500.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-6_barcodes_noCS.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes.tsv.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.rand10.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.rand100.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.rand200.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.rand300.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.rand50.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.rand500.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RBFOX2-TIA1-STAMP_round2E_read1_feature_FB_possorted_genome_bam_MD-7_barcodes_noCS.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam-RPS2_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD-RPS2_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-1.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-2.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-3.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-4.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-5.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-6.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-7.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-8.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.08-9.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.bamdownsampled.1-0.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.downsampled60M.sorted.bam,/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/RPS2-STAMP_possorted_genome_bam_MD.sorted.bam
0,Assigned,10304,6583,6583,6580,6560,7894,38449,9165,4819,4824,4909,9996,4735,6354,990,17791,2740,5910,10357,50014,43257,67919,15639,98953,...,6605,40392,82749,60046,1182,8698,13857,17489,5862,23035,54264,53642,45462,12424,12424,12424,12424,12424,12424,12424,12424,12424,13513,13513,25933
1,Unassigned_Unmapped,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Unassigned_MappingQuality,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Unassigned_Chimera,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Unassigned_FragmentLength,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Unassigned_Duplicate,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Unassigned_MultiMapping,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Unassigned_Secondary,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Unassigned_Nonjunction,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Unassigned_NoFeatures,0,0,0,0,0,109100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,15238,15238,15238,15238,15238,15238,15238,15238,15238,18936,18936,110722


In [15]:
for c in df.columns:
    print(c)

Status
/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam-APOBEC_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.sorted.bam
/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-1_barcodes.tsv.sorted.bam
/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-2_barcodes.tsv.sorted.bam
/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-5_barcodes.tsv.sorted.bam
/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/edit_featurecounts0.9/tmp/APOBEC-STAMP_possorted_genome_bam_MD-APOBEC_STAMP_Apo_fi