1. Confirm where are reads are on the gene (plot, from most 3' UTR to least 3' UTR). Can we support this type of study scRNA-seq wise: https://www.ncbi.nlm.nih.gov/pubmed/30290838 ?
2. Look at the RNA and find deviations from the refrence.
3. Look at the DNA and filter out all somatic/germline mutations from the RNA editing.
4. Looking at germline/somatic mutations. Can we confirm patient identity with the germline mutations?
5. What's the ratio of germline/somatic mutations? Can we see somatic mutation in RNA.
6. Can we see any spatial differences in cancers with various subclones and 3' UTR mRNA editing?

Interesting reading: https://www.ncbi.nlm.nih.gov/pubmed/24289319

In [1]:
import pysam
import collections
import time
from lru_cache import lru_cache
from multiprocessing import Pool
import shelve

bam_filename = "5k_pbmc_protein_v3_calmd.bam"
result_filename = "5k_pbmc_protein_v3_calmd_filtered.bam"
pileup_shelve = "./pileups/pileup_shelve"

limit = -1

# Filter out reads

In [None]:
flag_counter = collections.Counter()
total_reads = 0
after_duplicates = 0
after_genes_only = 0
good_reads = 0
genes = set()

# Open input and output files
with pysam.AlignmentFile(bam_filename, "rb") as f:
    with pysam.AlignmentFile(result_filename, "wb", template=f) as g:
        for i, read in enumerate(f):
            
            # Stop after the limit has been reached
            if i == limit:
                break
            
            total_reads += 1
            flag_counter[read.flag] += 1
            
            # Skip all non-primary/duplicate reads
            if read.flag & 0x900 != 0:
                continue
            
            after_duplicates += 1
            
            # Skip all reads that do not map to a gene
            try:
                gene = read.get_tag('GN')
                genes.add(gene)
            except KeyError:
                continue
            
            after_genes_only += 1
            
            # If we have introns that long, it is most probably
            # an alignment issue. We will skip those reads.
            long_intron = False
            
            for code, length in read.cigartuples:
                if code == 3 and length > 10000:
                        long_intron = True
            
            if long_intron:
                continue
            
            # Write rest of reads to file
            good_reads += 1
            g.write(read)

In [None]:
print("Reads flags:", flag_counter)
print("Total reads:", total_reads)
print("Reads after secondary alignment removal:", after_duplicates)
print("Reads after discarding whatever didn't map to a gene:", after_genes_only)
print("Reads after discarding all reads with long introns:", good_reads)

```
Reads flags: Counter({0: 59179732, 256: 42396755, 16: 42268302, 4: 33264525, 1024: 25415291, 1040: 16208331, 1028: 14491834, 272: 12184627})
Total reads: 245409397
Reads after secondary alignment removal: 190828015
Reads after discarding whatever didn't map to a gene: 83212367
Reads after discarding all reads with long introns: 82786316
```

In [None]:
print("Fraction of reads written to filtered file", good_reads/total_reads)

```
Fraction of reads written to filtered file 0.33733963333115563
```

In [None]:
print("Number of genes", len(genes))

```
Number of genes 22766
```

In [None]:
!samtools index $result_filename

# Find RNA editing evets, SNPs and SNVs

In [2]:
"""
This LRU cache will cache the lookups we have created.
The custom version of the cache will ensure that we don't end up
thrashing by marking the cache full if there is less than 1 GiB
of free memory left on the machine.
"""

GB = 1024**3

@lru_cache(use_memory_up_to=(1 * GB))
def get_read_pos_to_reference_lookup(alignment):
    read_ref_pair = alignment.get_aligned_pairs(with_seq=True)

    lookup = {read_position : reference_base for read_position, reference_position, reference_base in read_ref_pair}

    return lookup

In [3]:
def compare_read_to_reference_at_position(alignment, pos):
    try:
        read_base = alignment.seq[pos]
    except TypeError:
        return None
    
    lookup = get_read_pos_to_reference_lookup(alignment)
    reference_base = lookup[pos]
    
    return read_base, reference_base

In [4]:
def do_pileup(reference):
    with pysam.AlignmentFile(result_filename, "rb") as f, shelve.open("{}-{}".format(pileup_shelve, reference)) as p:
        """
        This creates a 'pileup', i.e. a list of objects mapping
        coordinates to reads that overlap with those coordinates.
        A column is for a specific coordinate in a range, and each
        column has a number of reads.
        """
        for column in f.pileup(contig=reference):            
            disagreement = collections.Counter()
            total = 0
            
            # Get reads for a position
            for read in column.pileups:
                if read.is_del or read.is_refskip:
                    continue
                
                total += 1
                
                # Get how the read we get differs from the standard genome.
                read_base, reference_base = compare_read_to_reference_at_position(read.alignment, read.query_position)
            
                # Save differences for the particular position into a counter.
                if read_base != reference_base:
                    strand = "+" if read.alignment.is_reverse == False else "-"
                    
                    disagreement[(strand, reference_base, read_base)] += 1
            
            # If we have found a substantial number of differences, print those out.
            if disagreement and total > 5:
                max_disagreement = max(disagreement.values())
                
                if max_disagreement > 3 and max_disagreement > 0.1 * total:
                    p[str(column.pos)] = (total, disagreement)
                    print(column.reference_name, column.pos, total, disagreement)
                    print(get_read_pos_to_reference_lookup.cache_info())

In [None]:
# A lot of the references we have are scaffolds we do not really care about.
# To utilize our cores as much as possible, I will manually define the references
# we wish to use so cores are not used by stuff we don't care about.
"""
with pysam.AlignmentFile(result_filename, "rb") as f:
    references = f.references
"""

references = ('1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '3', '4', '5', '6', '7', '8', '9', 'MT', 'X', 'Y')
print("References:", references)

with Pool() as p:
    p.map(do_pileup, references)

References: ('1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '3', '4', '5', '6', '7', '8', '9', 'MT', 'X', 'Y')
15 21298756 9 Counter({('-', 't', 'C'): 9})
14 20261458 6 Counter({('-', 't', 'C'): 4})
CacheInfo(hits=3357, misses=43, maxsize=None, currsize=43)
CacheInfo(hits=3355, misses=46, maxsize=None, currsize=46)
14 20261522 6 Counter({('-', 't', 'C'): 6})
CacheInfo(hits=3764, misses=46, maxsize=None, currsize=46)
1 778988 47 Counter({('+', 'c', 'G'): 6})
CacheInfo(hits=5964, misses=107, maxsize=None, currsize=107)
1 779046 32 Counter({('+', 'g', 'A'): 5})
CacheInfo(hits=8777, misses=118, maxsize=None, currsize=118)
20 279286 7 Counter({('-', 'c', 'G'): 4})
CacheInfo(hits=9813, misses=124, maxsize=None, currsize=124)
12 190375 8 Counter({('-', 'g', 'A'): 5})
CacheInfo(hits=11216, misses=138, maxsize=None, currsize=138)
20 297170 7 Counter({('+', 'a', 'G'): 7})
CacheInfo(hits=10823, misses=140, maxsize=None, currsize=140)
20 297171 6 Counter({(

15 22825210 21 Counter({('+', 'c', 'T'): 12})
CacheInfo(hits=159598, misses=1921, maxsize=None, currsize=1921)
CacheInfo(hits=185884, misses=2130, maxsize=None, currsize=2130)
19 541684 13 Counter({('+', 'c', 'T'): 6})
CacheInfo(hits=188280, misses=2189, maxsize=None, currsize=2189)
12 297125 9 Counter({('-', 'g', 'A'): 7})
CacheInfo(hits=189489, misses=2270, maxsize=None, currsize=2270)
18 263717 28 Counter({('-', 't', 'C'): 18})
CacheInfo(hits=162782, misses=1952, maxsize=None, currsize=1952)
10 843009 23 Counter({('-', 'g', 'C'): 4})
CacheInfo(hits=166642, misses=2038, maxsize=None, currsize=2038)
10 843012 22 Counter({('-', 'c', 'T'): 4})
CacheInfo(hits=166706, misses=2038, maxsize=None, currsize=2038)
10 843013 23 Counter({('-', 'a', 'G'): 4})
CacheInfo(hits=166729, misses=2038, maxsize=None, currsize=2038)
22 17112636 196 Counter({('+', 'a', 'C'): 20})
CacheInfo(hits=196300, misses=2514, maxsize=None, currsize=2514)
22 17112640 195 Counter({('+', 'c', 'G'): 21})
CacheInfo(hits=19

12 564621 54 Counter({('-', 'a', 'G'): 26})
CacheInfo(hits=358423, misses=4276, maxsize=None, currsize=4276)
12 645109 16 Counter({('+', 'a', 'G'): 16})
CacheInfo(hits=367760, misses=4377, maxsize=None, currsize=4377)
15 24954985 51 Counter({('+', 'c', 'G'): 30, ('+', 'c', 'A'): 1})
CacheInfo(hits=381234, misses=4511, maxsize=None, currsize=4511)
12 753474 7 Counter({('+', 't', 'G'): 4})
21 6122793 20 Counter({('+', 'c', 'T'): 8})
CacheInfo(hits=384589, misses=4572, maxsize=None, currsize=4572)
CacheInfo(hits=393666, misses=4587, maxsize=None, currsize=4587)
20 435525 21 Counter({('-', 't', 'C'): 11})
CacheInfo(hits=383653, misses=4523, maxsize=None, currsize=4523)
12 753822 11 Counter({('+', 't', 'C'): 11})
CacheInfo(hits=386894, misses=4602, maxsize=None, currsize=4602)
16 85841 39 Counter({('+', 'g', 'A'): 7})
CacheInfo(hits=395984, misses=4566, maxsize=None, currsize=4566)
3 4361468 35 Counter({('-', 't', 'G'): 35})
CacheInfo(hits=380858, misses=4507, maxsize=None, currsize=4507)
2

22 17180456 6 Counter({('-', 'c', 'G'): 6})
CacheInfo(hits=696740, misses=8550, maxsize=None, currsize=8550)
13 19672261 163 Counter({('+', 'a', 'G'): 40})
CacheInfo(hits=557573, misses=6538, maxsize=None, currsize=6538)
22 17180873 6 Counter({('-', 't', 'C'): 5})
CacheInfo(hits=701680, misses=8613, maxsize=None, currsize=8613)
2 3500915 10 Counter({('-', 'a', 'G'): 5})
22 17180900 11 Counter({('-', 'c', 'T'): 10})
CacheInfo(hits=701904, misses=8619, maxsize=None, currsize=8619)
CacheInfo(hits=705898, misses=8267, maxsize=None, currsize=8267)
12 991690 17 Counter({('+', 'g', 'C'): 8})
CacheInfo(hits=704212, misses=8480, maxsize=None, currsize=8480)
10 3137982 111 Counter({('-', 'c', 'T'): 52})
CacheInfo(hits=721763, misses=8694, maxsize=None, currsize=8694)
22 17181902 9 Counter({('-', 'a', 'G'): 9})
CacheInfo(hits=706793, misses=8674, maxsize=None, currsize=8674)
10 3138034 59 Counter({('-', 't', 'C'): 59})
CacheInfo(hits=726086, misses=8714, maxsize=None, currsize=8714)
3 5218758 6 C

12 2824705 40 Counter({('+', 'c', 'T'): 17})
CacheInfo(hits=1024564, misses=12295, maxsize=None, currsize=12295)
10 3776930 95 Counter({('-', 'c', 'T'): 19})
CacheInfo(hits=1091750, misses=13570, maxsize=None, currsize=13570)
10 3776931 98 Counter({('-', 'c', 'T'): 15, ('-', 'c', 'A'): 1})
CacheInfo(hits=1091848, misses=13570, maxsize=None, currsize=13570)
10 3776932 79 Counter({('-', 't', 'C'): 10})
CacheInfo(hits=1091922, misses=13575, maxsize=None, currsize=13575)
1 1223250 34 Counter({('-', 'a', 'G'): 34})
CacheInfo(hits=1141516, misses=13229, maxsize=None, currsize=13229)
21 6491704 36 Counter({('-', 'c', 'T'): 16, ('-', 'c', 'G'): 1})
CacheInfo(hits=1076570, misses=12596, maxsize=None, currsize=12596)
2 3576509 1603 Counter({('+', 'g', 'A'): 231})
CacheInfo(hits=1067839, misses=13735, maxsize=None, currsize=13735)
12 3606766 75 Counter({('-', 't', 'C'): 33})
CacheInfo(hits=1111122, misses=13361, maxsize=None, currsize=13361)
12 3606778 47 Counter({('-', 't', 'C'): 5})
CacheInfo(h

15 28111712 6 Counter({('-', 'c', 'T'): 6})
CacheInfo(hits=1196851, misses=13940, maxsize=None, currsize=13940)
17 1574766 43 Counter({('-', 'a', 'C'): 42})
CacheInfo(hits=1448872, misses=17470, maxsize=None, currsize=17470)
3 9478139 7 Counter({('+', 't', 'A'): 4})
CacheInfo(hits=1388273, misses=16606, maxsize=None, currsize=16606)
3 9478140 7 Counter({('+', 'c', 'A'): 4})
CacheInfo(hits=1388280, misses=16606, maxsize=None, currsize=16606)
3 9478142 6 Counter({('+', 't', 'A'): 4})
CacheInfo(hits=1388292, misses=16606, maxsize=None, currsize=16606)
17 1585378 25 Counter({('-', 't', 'C'): 4})
CacheInfo(hits=1458377, misses=17591, maxsize=None, currsize=17591)
3 9649504 25 Counter({('+', 'a', 'G'): 6})
CacheInfo(hits=1399261, misses=16761, maxsize=None, currsize=16761)
17 1585440 54 Counter({('-', 't', 'C'): 6})
CacheInfo(hits=1460922, misses=17634, maxsize=None, currsize=17634)
17 1585441 55 Counter({('-', 't', 'C'): 7})
CacheInfo(hits=1460976, misses=17635, maxsize=None, currsize=17635

20 1393086 45 Counter({('-', 'a', 'C'): 8, ('-', 'a', 'T'): 4})
CacheInfo(hits=1775303, misses=20963, maxsize=None, currsize=20963)
20 1393091 23 Counter({('-', 'g', 'T'): 4})
CacheInfo(hits=1775461, misses=20963, maxsize=None, currsize=20963)
12 6389567 10 Counter({('+', 'a', 'G'): 8})
CacheInfo(hits=1858161, misses=22187, maxsize=None, currsize=22187)
17 1779717 14 Counter({('-', 'g', 'A'): 5})
CacheInfo(hits=1929686, misses=23231, maxsize=None, currsize=23231)
17 1780826 48 Counter({('-', 't', 'C'): 7})
CacheInfo(hits=1936093, misses=23328, maxsize=None, currsize=23328)
17 1781121 26 Counter({('-', 't', 'C'): 4})
CacheInfo(hits=1942666, misses=23392, maxsize=None, currsize=23392)
17 1781123 23 Counter({('-', 't', 'C'): 4})
CacheInfo(hits=1942714, misses=23392, maxsize=None, currsize=23392)
17 1781147 23 Counter({('-', 't', 'C'): 7})
CacheInfo(hits=1943271, misses=23393, maxsize=None, currsize=23393)
17 1827973 8 Counter({('-', 't', 'C'): 8})
CacheInfo(hits=1960687, misses=23594, max

CacheInfo(hits=2282553, misses=27204, maxsize=None, currsize=27204)
15 33852029 6 Counter({('-', 't', 'A'): 6})
CacheInfo(hits=2227033, misses=26266, maxsize=None, currsize=26266)
19 896505 28 Counter({('-', 'a', 'G'): 8})
CacheInfo(hits=2331389, misses=27197, maxsize=None, currsize=27197)
15 34084019 8 Counter({('-', 'g', 'T'): 5})
CacheInfo(hits=2257962, misses=26630, maxsize=None, currsize=26630)
15 34084020 9 Counter({('-', 'a', 'T'): 6})
CacheInfo(hits=2257971, misses=26630, maxsize=None, currsize=26630)
10 3781053 8 Counter({('-', 'a', 'G'): 8})
CacheInfo(hits=2456029, misses=29834, maxsize=None, currsize=29834)
13 21178774 14 Counter({('+', 'g', 'A'): 5})
CacheInfo(hits=2223981, misses=26090, maxsize=None, currsize=26090)
13 21372639 86 Counter({('-', 'g', 'A'): 39})
CacheInfo(hits=2236417, misses=26292, maxsize=None, currsize=26292)
17 2300053 13 Counter({('-', 't', 'C'): 6})
CacheInfo(hits=2470139, misses=29509, maxsize=None, currsize=29509)
17 2300158 14 Counter({('-', 'a', '

16 1314363 55 Counter({('+', 'a', 'G'): 27})
CacheInfo(hits=2521254, misses=29474, maxsize=None, currsize=29474)
12 6493200 12 Counter({('-', 'c', 'T'): 12})
CacheInfo(hits=2673372, misses=31814, maxsize=None, currsize=31814)
16 1316306 10 Counter({('+', 't', 'A'): 10})
CacheInfo(hits=2529664, misses=29572, maxsize=None, currsize=29572)
16 1316423 33 Counter({('+', 'g', 'T'): 21})
CacheInfo(hits=2531890, misses=29609, maxsize=None, currsize=29609)
12 6529921 8 Counter({('+', 'a', 'G'): 8})
CacheInfo(hits=2682390, misses=31920, maxsize=None, currsize=31920)
17 2415771 20 Counter({('-', 'g', 'A'): 12})
CacheInfo(hits=2732832, misses=32648, maxsize=None, currsize=32648)
1 1407231 75 Counter({('-', 'g', 'C'): 74})
CacheInfo(hits=2659397, misses=30973, maxsize=None, currsize=30973)
3 9836098 60 Counter({('+', 'g', 'A'): 22})
CacheInfo(hits=2724515, misses=32408, maxsize=None, currsize=32408)
17 2417276 51 Counter({('-', 't', 'C'): 9})
CacheInfo(hits=2752261, misses=32901, maxsize=None, curr

19 1037932 42 Counter({('+', 'g', 'C'): 42})
CacheInfo(hits=2906010, misses=34070, maxsize=None, currsize=34070)
20 2653289 26 Counter({('+', 't', 'C'): 13})
CacheInfo(hits=2902204, misses=34359, maxsize=None, currsize=34359)
19 1037986 81 Counter({('+', 'a', 'G'): 77})
CacheInfo(hits=2910277, misses=34166, maxsize=None, currsize=34166)
13 25097965 27 Counter({('+', 't', 'C'): 25})
CacheInfo(hits=2780921, misses=32854, maxsize=None, currsize=32854)
20 2653520 7 Counter({('+', 'c', 'A'): 7})
CacheInfo(hits=2907681, misses=34412, maxsize=None, currsize=34412)
17 3037425 17 Counter({('+', 'c', 'A'): 17})
CacheInfo(hits=3000974, misses=35861, maxsize=None, currsize=35861)
13 25247311 50 Counter({('-', 'g', 'A'): 49, ('-', 'g', 'C'): 1})
CacheInfo(hits=2788738, misses=32971, maxsize=None, currsize=32971)
17 3037464 69 Counter({('+', 'a', 'G'): 68})
CacheInfo(hits=3002551, misses=35913, maxsize=None, currsize=35913)
3 9963707 106 Counter({('-', 't', 'C'): 66, ('-', 't', 'A'): 1})
CacheInfo(h

CacheInfo(hits=3405725, misses=41149, maxsize=None, currsize=41149)
15 34343129 32 Counter({('-', 'g', 'C'): 4})
CacheInfo(hits=3278077, misses=38629, maxsize=None, currsize=38629)
10 6089002 12 Counter({('+', 'c', 'G'): 5})
CacheInfo(hits=3438192, misses=41523, maxsize=None, currsize=41523)
10 6089007 16 Counter({('+', 'c', 'G'): 4})
CacheInfo(hits=3438263, misses=41527, maxsize=None, currsize=41527)
10 6089019 41 Counter({('+', 'c', 'G'): 8})
CacheInfo(hits=3438597, misses=41552, maxsize=None, currsize=41552)
10 6089032 76 Counter({('+', 'c', 'G'): 25})
CacheInfo(hits=3439249, misses=41587, maxsize=None, currsize=41587)
19 1065563 17 Counter({('+', 'g', 'C'): 17})
CacheInfo(hits=3286187, misses=38517, maxsize=None, currsize=38517)
19 1067177 10 Counter({('+', 'c', 'T'): 4})
CacheInfo(hits=3286568, misses=38529, maxsize=None, currsize=38529)
19 1067178 10 Counter({('+', 'c', 'A'): 4})
CacheInfo(hits=3286578, misses=38529, maxsize=None, currsize=38529)
19 1067179 12 Counter({('+', 'g',

CacheInfo(hits=3718381, misses=43992, maxsize=None, currsize=43992)
17 3896818 194 Counter({('-', 'g', 'C'): 20})
CacheInfo(hits=3711436, misses=44338, maxsize=None, currsize=44338)
17 3896824 186 Counter({('-', 'g', 'T'): 20})
CacheInfo(hits=3712563, misses=44347, maxsize=None, currsize=44347)
17 3896827 182 Counter({('-', 'c', 'T'): 19})
CacheInfo(hits=3713106, misses=44349, maxsize=None, currsize=44349)
17 3896831 171 Counter({('-', 't', 'C'): 18})
CacheInfo(hits=3713810, misses=44351, maxsize=None, currsize=44351)
17 3896923 36 Counter({('-', 'c', 'T'): 34})
CacheInfo(hits=3721931, misses=44394, maxsize=None, currsize=44394)
3 10151790 103 Counter({('+', 'a', 'G'): 18})
CacheInfo(hits=3737796, misses=44271, maxsize=None, currsize=44271)
10 6427192 25 Counter({('-', 'g', 'T'): 13})
CacheInfo(hits=3730399, misses=45044, maxsize=None, currsize=45044)
3 10151824 123 Counter({('+', 'a', 'G'): 122})
CacheInfo(hits=3741728, misses=44328, maxsize=None, currsize=44328)
17 3923874 43 Counter

3 11557373 23 Counter({('+', 'a', 'G'): 7, ('+', 'a', 'T'): 1})
CacheInfo(hits=4086160, misses=48385, maxsize=None, currsize=48385)
22 20708084 49 Counter({('-', 'c', 'T'): 18})
CacheInfo(hits=3783442, misses=44854, maxsize=None, currsize=44854)
15 34553932 18 Counter({('-', 't', 'C'): 7})
CacheInfo(hits=4013280, misses=47212, maxsize=None, currsize=47212)
15 34853351 7 Counter({('-', 'c', 'T'): 7})
CacheInfo(hits=4029746, misses=47432, maxsize=None, currsize=47432)
15 34853938 8 Counter({('-', 't', 'A'): 8})
CacheInfo(hits=4036234, misses=47509, maxsize=None, currsize=47509)
15 34855851 21 Counter({('-', 'g', 'T'): 7})
CacheInfo(hits=4046127, misses=47627, maxsize=None, currsize=47627)
15 34855862 15 Counter({('-', 'c', 'G'): 5})
CacheInfo(hits=4046318, misses=47628, maxsize=None, currsize=47628)
15 34856806 47 Counter({('-', 'c', 'T'): 46})
CacheInfo(hits=4066432, misses=47897, maxsize=None, currsize=47897)
3 11790443 12 Counter({('-', 'c', 'T'): 5})
CacheInfo(hits=4149417, misses=49

CacheInfo(hits=4230594, misses=49513, maxsize=None, currsize=49513)
1 1754600 12 Counter({('-', 'g', 'T'): 9})
CacheInfo(hits=4320014, misses=50658, maxsize=None, currsize=50658)
22 21469513 87 Counter({('+', 'c', 'T'): 85})
CacheInfo(hits=4200206, misses=49821, maxsize=None, currsize=49821)
22 21469642 176 Counter({('+', 't', 'C'): 139})
CacheInfo(hits=4204926, misses=49913, maxsize=None, currsize=49913)
11 406472 10 Counter({('-', 'a', 'C'): 10})
CacheInfo(hits=4249672, misses=49572, maxsize=None, currsize=49572)
11 406482 10 Counter({('-', 't', 'C'): 10})
CacheInfo(hits=4249779, misses=49573, maxsize=None, currsize=49573)
15 40035478 32 Counter({('+', 'g', 'A'): 6})
CacheInfo(hits=4567130, misses=53900, maxsize=None, currsize=53900)
15 40035488 34 Counter({('+', 'a', 'T'): 14})
CacheInfo(hits=4567502, misses=53900, maxsize=None, currsize=53900)
15 40035496 23 Counter({('+', 'c', 'G'): 12})
CacheInfo(hits=4567762, misses=53901, maxsize=None, currsize=53901)
1 1785577 181 Counter({('-

CacheInfo(hits=4752661, misses=57542, maxsize=None, currsize=57542)
20 3926777 37 Counter({('+', 'g', 'A'): 17})
CacheInfo(hits=4984777, misses=59130, maxsize=None, currsize=59130)
10 11330251 22 Counter({('+', 'g', 'C'): 22})
CacheInfo(hits=4768492, misses=57246, maxsize=None, currsize=57246)
18 3594455 37 Counter({('+', 't', 'G'): 4})
CacheInfo(hits=4933648, misses=57450, maxsize=None, currsize=57450)
20 3929758 38 Counter({('+', 't', 'C'): 21})
CacheInfo(hits=5020246, misses=59563, maxsize=None, currsize=59563)
20 3932475 10 Counter({('-', 'g', 'T'): 6})
CacheInfo(hits=5028405, misses=59654, maxsize=None, currsize=59654)
18 5236926 47 Counter({('-', 'g', 'A'): 23})
CacheInfo(hits=4964212, misses=57810, maxsize=None, currsize=57810)
18 5237049 13 Counter({('-', 't', 'G'): 7})
CacheInfo(hits=4968154, misses=57840, maxsize=None, currsize=57840)
18 5237162 12 Counter({('-', 'c', 'G'): 7})
CacheInfo(hits=4969566, misses=57857, maxsize=None, currsize=57857)
18 5238828 16 Counter({('+', 'c

CacheInfo(hits=5429572, misses=64593, maxsize=None, currsize=64593)
20 4782222 6 Counter({('-', 'g', 'A'): 6})
CacheInfo(hits=5430553, misses=64604, maxsize=None, currsize=64604)
20 4782437 12 Counter({('-', 'g', 'C'): 12})
CacheInfo(hits=5431684, misses=64620, maxsize=None, currsize=64620)
20 4782461 10 Counter({('-', 'g', 'A'): 10})
CacheInfo(hits=5431977, misses=64623, maxsize=None, currsize=64623)
17 4937572 542 Counter({('-', 'g', 'A'): 261})
CacheInfo(hits=5581572, misses=66765, maxsize=None, currsize=66765)
11 567255 11 Counter({('-', 'g', 'A'): 5})
CacheInfo(hits=5151312, misses=60056, maxsize=None, currsize=60056)
12 6568286 15 Counter({('-', 'g', 'C'): 4})
CacheInfo(hits=5011309, misses=59106, maxsize=None, currsize=59106)
12 6570156 52 Counter({('-', 'c', 'T'): 8})
CacheInfo(hits=5014195, misses=59177, maxsize=None, currsize=59177)
20 4784035 16 Counter({('-', 't', 'C'): 16})
CacheInfo(hits=5466396, misses=65032, maxsize=None, currsize=65032)
19 1228191 112 Counter({('+', 't

10 12250344 477 Counter({('+', 'a', 'G'): 475})
CacheInfo(hits=5616436, misses=67876, maxsize=None, currsize=67876)
10 12250350 515 Counter({('+', 'a', 'G'): 262})
CacheInfo(hits=5619463, misses=67933, maxsize=None, currsize=67933)
15 40088005 60 Counter({('-', 't', 'A'): 58})
CacheInfo(hits=6043112, misses=71387, maxsize=None, currsize=71387)
12 6650512 19 Counter({('-', 't', 'C'): 19})
CacheInfo(hits=5403088, misses=63806, maxsize=None, currsize=63806)
20 5115452 32 Counter({('-', 'g', 'C'): 15})
CacheInfo(hits=5998176, misses=71316, maxsize=None, currsize=71316)
15 40284425 30 Counter({('-', 'g', 'C'): 4})
CacheInfo(hits=6066525, misses=71659, maxsize=None, currsize=71659)
15 40284427 30 Counter({('-', 't', 'C'): 4})
CacheInfo(hits=6066585, misses=71659, maxsize=None, currsize=71659)
15 40284429 28 Counter({('-', 'a', 'T'): 4})
CacheInfo(hits=6066639, misses=71660, maxsize=None, currsize=71660)
15 40284430 27 Counter({('-', 'a', 'G'): 4})
CacheInfo(hits=6066666, misses=71660, maxsiz

How many hours will it take to run the RNA modifications/ editing script

In [None]:
(82786316/(1300000 - 1000000) * 28.734182119369507) / 3600

In [None]:
read.get_aligned_pairs(with_seq=True)