In [None]:
"""

Date: June 3, 2022

Author: Karine Choquet

This script will produce datasets for Figures 1 and S1 of the splicing order manuscript



"""

In [1]:
import numpy as np
import pandas as pd
import pysam
import seaborn as sns
sns.set_style("white")
sns.set_style("ticks")

import matplotlib.pyplot as plt
import re
%matplotlib inline
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rc('xtick', labelsize=14) 
plt.rc('ytick', labelsize=14)

import matplotlib.ticker as ticker
from matplotlib.ticker import ScalarFormatter

import math

import pybedtools
from pybedtools import BedTool

from collections import Counter

import random


import scipy
from scipy import stats
from scipy.stats import ttest_ind, ttest_ind_from_stats
from scipy.special import stdtr
from scipy.stats import chi2_contingency

import itertools

from statsmodels.stats.multitest import multipletests

from more_itertools import consecutive_groups

from tqdm import tqdm

In [2]:
# Load intron features and gene_names df
hg38_intron_df = pd.read_table("/path/to/annotation_files/hg38_all_intron_features.txt")

hg38_intron_df = hg38_intron_df[['gene','intron_total','strand']].drop_duplicates().reset_index(drop=True)



## Splicing order and poly(A) tail length

In [3]:
# Import data (splicing status per read)
chr1_multi_introns_df = pd.read_table("/path/to/K562_chr_rep1_hg38_multi_introns_isoforms_df.RefSeq.txt")
chr2_multi_introns_df = pd.read_table("/path/to/K562_chr_rep2_hg38_multi_introns_isoforms_df.RefSeq.txt")

chr_multi_introns_df = pd.concat([chr1_multi_introns_df,chr2_multi_introns_df]).reset_index(drop=True)
chr_multi_introns_df = chr_multi_introns_df.merge(hg38_intron_df, on=['gene','strand'])

In [4]:
# Import splicing order data
path_df_nodup = pd.read_table('/path/to/TableS3_K562_splicing_order_paths.txt')
path_df_nodup_top = path_df_nodup[path_df_nodup['rank']==1].reset_index(drop=True)

In [5]:
# Define intron groups
regions_df = path_df_nodup_top[['gene','gene_name','analyzed_introns','n_analyzed_introns']].drop_duplicates().reset_index(drop=True)
regions_df = regions_df.merge(hg38_intron_df, on='gene')

In [7]:
# Change intron number from genomic position to position in transcript
def reset_intron_numbers(introns_of_interest, n_introns_gene, strand):

    introns_of_interest_list_tmp = introns_of_interest.split("_")
    if strand == "-":
        introns_of_interest_list = [str(n_introns_gene - int(i)) for i in introns_of_interest_list_tmp]
        introns_of_interest_fix = "_".join(introns_of_interest_list)
    elif strand == "+":
        introns_of_interest_list = [str(int(i)+1) for i in introns_of_interest_list_tmp]
        introns_of_interest_fix = "_".join(introns_of_interest_list)

    return introns_of_interest_fix

In [8]:
# Change intron number
chr_multi_introns_df['new_intron_numbers'] = chr_multi_introns_df.apply(lambda row: reset_intron_numbers(row.intron_numbers, row.intron_total, row.strand),axis=1)

In [9]:
# Get splicing status only for introns within intron group
def code_intron_status(df, n_introns, analyzed_introns_list):
    
    results_list = []
    
    for row in range(len(df)):
        readname = df.loc[row]['read']
        strand = df.loc[row]['strand']
        intron_numbers = df.loc[row]['new_intron_numbers']
        splice_status_introns = df.loc[row]['splice_status']
    
        intron_numbers_list = [int(a) for a in intron_numbers.split("_")]
        splice_status_list_introns = splice_status_introns.split("_")
        
        introns_of_interest_pos = [i for i, x in enumerate(intron_numbers_list) if x in analyzed_introns_list]
        splice_status_list_sub = [splice_status_list_introns[a] for a in introns_of_interest_pos]
        
        new_splice_status_list = [readname] + splice_status_list_sub
            
        results_list.append(new_splice_status_list)
    results_df = pd.DataFrame(results_list)
    results_df.columns = ['readname'] + analyzed_introns_list
            
    return(results_df)
            
    
    

In [10]:
# Apply function above to each intron group
mat_list = []

for i in tqdm(range(len(regions_df))):
    gene = regions_df.loc[i]['gene']
    analyzed_introns = regions_df.loc[i]['analyzed_introns']
    n_analyzed_introns = regions_df.loc[i]['n_analyzed_introns']
    intron_total = regions_df.loc[i]['intron_total']
    strand = regions_df.loc[i]['strand']
    
    print(gene, analyzed_introns)
    
    analyzed_introns_list = [int(a) for a in analyzed_introns.split("_")]
    
    # in case the list contains non-consecutive numbers, fill it in
    new_analyzed_introns_list = list(range(min(analyzed_introns_list), max(analyzed_introns_list)+1))
    if strand == "-":
        new_analyzed_introns_list.reverse()
    new_analyzed_introns_list_str = [str(a) for a in new_analyzed_introns_list]
    new_analyzed_introns = "_".join(new_analyzed_introns_list_str)
    
    column_list = ['readname'] + analyzed_introns_list

    df = chr_multi_introns_df[(chr_multi_introns_df['gene']==gene) & 
                              ((chr_multi_introns_df['new_intron_numbers'].str.contains(new_analyzed_introns)) |
                               (chr_multi_introns_df['new_intron_numbers'].str.contains(analyzed_introns)))].reset_index(drop=True)
    mat = code_intron_status(df, intron_total+1, analyzed_introns_list)
    mat2 = mat.copy()[column_list]
    
    if n_analyzed_introns == 3:
        mat2.columns = ['readname','int1','int2','int3']
        mat2['splice_status'] = mat2['int1'] + '_' + mat2['int2'] + '_' + mat2['int3']
    elif n_analyzed_introns == 4:
        mat2.columns = ['readname','int1','int2','int3','int4']
        mat2['splice_status'] = mat2['int1'] + '_' + mat2['int2'] + '_' + mat2['int3'] + '_' + mat2['int4']
       
    mat_final = mat2.copy()[['readname','splice_status']]
    mat_final['gene'] = gene
    mat_final['analyzed_introns'] = analyzed_introns
    mat_final['n_analyzed_introns'] = n_analyzed_introns
    mat_list.append(mat_final)
    
mat_all = pd.concat(mat_list)

  0%|          | 0/669 [00:00<?, ?it/s]

NM_000018.3 10_11_12_13


  0%|          | 1/669 [00:01<21:02,  1.89s/it]

NM_000018.3 11_12_13_14


  0%|          | 2/669 [00:03<18:31,  1.67s/it]

NM_000018.3 13_14_15_17


  0%|          | 3/669 [00:04<17:40,  1.59s/it]

NM_000018.3 15_17_18_19


  1%|          | 4/669 [00:06<17:14,  1.56s/it]

NM_000018.3 8_9_10_11


  1%|          | 5/669 [00:07<16:27,  1.49s/it]

NM_000034.3 10_11_12_13


  1%|          | 6/669 [00:09<17:54,  1.62s/it]

NM_000034.3 8_9_10


  1%|          | 7/669 [00:11<19:08,  1.73s/it]

NM_000034.3 9_10_11


  1%|          | 8/669 [00:13<19:51,  1.80s/it]

NM_000048.3 7_8_9


  1%|▏         | 9/669 [00:14<18:14,  1.66s/it]

NM_000117.2 2_3_4_5


  1%|▏         | 10/669 [00:16<17:38,  1.61s/it]

NM_000146.3 1_2_3


  2%|▏         | 11/669 [00:18<18:23,  1.68s/it]

NM_000181.3 7_6_5


  2%|▏         | 12/669 [00:19<17:53,  1.63s/it]

NM_000190.3 10_11_12_13


  2%|▏         | 13/669 [00:21<18:37,  1.70s/it]

NM_000190.3 1_3_4


  2%|▏         | 14/669 [00:23<17:59,  1.65s/it]

NM_000190.3 3_4_6


  2%|▏         | 15/669 [00:24<18:04,  1.66s/it]

NM_000190.3 8_9_10_11


  2%|▏         | 16/669 [00:26<17:48,  1.64s/it]

NM_000190.3 9_10_11_12


  3%|▎         | 17/669 [00:27<17:35,  1.62s/it]

NM_000270.3 2_3_4


  3%|▎         | 18/669 [00:29<16:57,  1.56s/it]

NM_000284.3 4_5_6


  3%|▎         | 19/669 [00:30<16:24,  1.51s/it]

NM_000284.3 5_6_7


  3%|▎         | 20/669 [00:32<15:55,  1.47s/it]

NM_000374.4 2_3_4_5


  3%|▎         | 21/669 [00:33<16:42,  1.55s/it]

NM_000374.4 3_4_5_6


  3%|▎         | 22/669 [00:35<17:21,  1.61s/it]

NM_000661.4 5_4_3


  3%|▎         | 23/669 [00:37<17:15,  1.60s/it]

NM_000661.4 6_5_4


  4%|▎         | 24/669 [00:38<17:20,  1.61s/it]

NM_000661.4 7_6_5


  4%|▎         | 25/669 [00:40<17:10,  1.60s/it]

NM_000852.3 1_2_3_4


  4%|▍         | 26/669 [00:41<16:21,  1.53s/it]

NM_000852.3 2_3_4_5


  4%|▍         | 27/669 [00:43<16:07,  1.51s/it]

NM_000852.3 3_4_5_6


  4%|▍         | 28/669 [00:45<17:27,  1.63s/it]

NM_000858.5 2_3_4


  4%|▍         | 29/669 [00:46<16:21,  1.53s/it]

NM_000858.5 3_4_5_6


  4%|▍         | 30/669 [00:47<15:59,  1.50s/it]

NM_000858.5 5_6_7


  5%|▍         | 31/669 [00:49<15:54,  1.50s/it]

NM_000884.2 4_3_2


  5%|▍         | 32/669 [00:50<15:48,  1.49s/it]

NM_000967.3 6_5_4_3


  5%|▍         | 33/669 [00:52<16:44,  1.58s/it]

NM_000967.3 7_6_5_4


  5%|▌         | 34/669 [00:54<17:04,  1.61s/it]

NM_000967.3 8_7_6_5


  5%|▌         | 35/669 [00:56<17:30,  1.66s/it]

NM_000968.3 4_3_2


  5%|▌         | 36/669 [00:57<16:53,  1.60s/it]

NM_000968.3 6_5_4_3


  6%|▌         | 37/669 [00:59<16:49,  1.60s/it]

NM_000968.3 8_7_6_5


  6%|▌         | 38/669 [01:00<16:18,  1.55s/it]

NM_000968.3 9_8_7_6


  6%|▌         | 39/669 [01:02<16:08,  1.54s/it]

NM_000971.3 4_3_2_1


  6%|▌         | 40/669 [01:03<15:58,  1.52s/it]

NM_000971.3 5_4_3_2


  6%|▌         | 41/669 [01:05<15:45,  1.51s/it]

NM_000971.3 6_5_4_3


  6%|▋         | 42/669 [01:06<15:31,  1.49s/it]

NM_000972.2 2_3_4_5


  6%|▋         | 43/669 [01:08<16:13,  1.56s/it]

NM_000972.2 3_4_5_6


  7%|▋         | 44/669 [01:09<16:30,  1.58s/it]

NM_000972.2 4_5_6_7


  7%|▋         | 45/669 [01:11<16:38,  1.60s/it]

NM_000973.4 4_3_2


  7%|▋         | 46/669 [01:13<17:56,  1.73s/it]

NM_000973.4 5_4_3


  7%|▋         | 47/669 [01:15<18:54,  1.82s/it]

NM_000976.3 4_3_2_1


  7%|▋         | 48/669 [01:17<17:54,  1.73s/it]

NM_000976.3 5_4_3_2


  7%|▋         | 49/669 [01:18<17:35,  1.70s/it]

NM_000976.3 6_5_4_3


  7%|▋         | 50/669 [01:20<17:20,  1.68s/it]

NM_000977.3 1_2_3_4


  8%|▊         | 51/669 [01:21<16:48,  1.63s/it]

NM_000977.3 2_3_4_5


  8%|▊         | 52/669 [01:23<17:06,  1.66s/it]

NM_000978.3 3_2_1


  8%|▊         | 53/669 [01:27<24:35,  2.40s/it]

NM_000978.3 4_3_2


  8%|▊         | 54/669 [01:30<26:53,  2.62s/it]

NM_000979.3 4_3_2_1


  8%|▊         | 55/669 [01:32<24:22,  2.38s/it]

NM_000979.3 5_4_3_2


  8%|▊         | 56/669 [01:34<23:35,  2.31s/it]

NM_000980.3 2_3_4


  9%|▊         | 57/669 [01:36<20:35,  2.02s/it]

NM_000981.3 1_2_3


  9%|▊         | 58/669 [01:37<18:23,  1.81s/it]

NM_000982.3 3_4_5


  9%|▉         | 59/669 [01:38<16:52,  1.66s/it]

NM_000985.4 5_4_3_2


  9%|▉         | 60/669 [01:40<15:53,  1.57s/it]

NM_000985.4 6_5_4_3


  9%|▉         | 61/669 [01:41<15:11,  1.50s/it]

NM_000990.4 1_2_3


  9%|▉         | 62/669 [01:42<14:32,  1.44s/it]

NM_000990.4 2_3_4


  9%|▉         | 63/669 [01:44<14:21,  1.42s/it]

NM_000991.4 2_3_4


 10%|▉         | 64/669 [01:46<15:52,  1.57s/it]

NM_000998.4 1_2_3


 10%|▉         | 65/669 [01:47<15:36,  1.55s/it]

NM_001001852.3 3_4_5


 10%|▉         | 66/669 [01:48<14:51,  1.48s/it]

NM_001002.3 4_3_2_1


 10%|█         | 67/669 [01:50<14:35,  1.46s/it]

NM_001002.3 5_4_3_2


 10%|█         | 68/669 [01:51<15:01,  1.50s/it]

NM_001002.3 7_6_5_4


 10%|█         | 69/669 [01:53<15:19,  1.53s/it]

NM_001002251.2 1_2_3


 10%|█         | 70/669 [01:54<14:31,  1.45s/it]

NM_001002252.2 1_2_3


 11%|█         | 71/669 [01:56<13:57,  1.40s/it]

NM_001002252.2 2_3_4


 11%|█         | 72/669 [01:57<13:35,  1.37s/it]

NM_001002258.4 3_2_1


 11%|█         | 73/669 [01:59<15:14,  1.53s/it]

NM_001003.2 1_2_3


 11%|█         | 74/669 [02:01<16:24,  1.66s/it]

NM_001003713.2 3_2_1


 11%|█         | 75/669 [02:02<15:31,  1.57s/it]

NM_001004.3 1_2_3_4


 11%|█▏        | 76/669 [02:04<16:09,  1.64s/it]

NM_001005.4 3_4_5


 12%|█▏        | 77/669 [02:06<17:00,  1.72s/it]

NM_001005.4 4_5_6


 12%|█▏        | 78/669 [02:08<16:49,  1.71s/it]

NM_001005290.3 5_4_3


 12%|█▏        | 79/669 [02:09<15:36,  1.59s/it]

NM_001005920.2 5_4_3


 12%|█▏        | 80/669 [02:10<14:47,  1.51s/it]

NM_001005920.2 8_7_6


 12%|█▏        | 81/669 [02:11<14:07,  1.44s/it]

NM_001007.4 6_5_4


 12%|█▏        | 82/669 [02:13<14:48,  1.51s/it]

NM_001008563.4 23_24_25


 12%|█▏        | 83/669 [02:14<14:06,  1.44s/it]

NM_001008695.1 3_2_1


 13%|█▎        | 84/669 [02:16<13:40,  1.40s/it]

NM_001009939.2 7_8_9


 13%|█▎        | 85/669 [02:17<13:18,  1.37s/it]

NM_001009996.2 10_9_7


 13%|█▎        | 86/669 [02:18<12:56,  1.33s/it]

NM_001009996.2 11_10_9


 13%|█▎        | 87/669 [02:20<12:41,  1.31s/it]

NM_001009998.3 14_15_16


 13%|█▎        | 88/669 [02:21<12:30,  1.29s/it]

NM_001010.2 4_3_2


 13%|█▎        | 89/669 [02:22<13:36,  1.41s/it]

NM_001011.3 4_5_6


 13%|█▎        | 90/669 [02:24<13:15,  1.37s/it]

NM_001012.1 1_2_3_4


 14%|█▎        | 91/669 [02:25<13:42,  1.42s/it]

NM_001012.1 2_3_4_5


 14%|█▍        | 92/669 [02:29<21:35,  2.24s/it]

NM_001014433.2 3_2_1


 14%|█▍        | 93/669 [02:31<20:41,  2.15s/it]

NM_001014433.2 5_4_3_2


 14%|█▍        | 94/669 [02:33<19:19,  2.02s/it]

NM_001014443.2 8_9_10_11


 14%|█▍        | 95/669 [02:34<17:25,  1.82s/it]

NM_001014837.1 3_2_1


 14%|█▍        | 96/669 [02:36<16:33,  1.73s/it]

NM_001014838.1 3_2_1


 14%|█▍        | 97/669 [02:38<15:59,  1.68s/it]

NM_001014840.1 4_3_2_1


 15%|█▍        | 98/669 [02:39<15:36,  1.64s/it]

NM_001015.4 1_2_3_4


 15%|█▍        | 99/669 [02:41<16:25,  1.73s/it]

NM_001016.3 1_2_3_4


 15%|█▍        | 100/669 [02:43<15:45,  1.66s/it]

NM_001017919.1 2_6_7


 15%|█▌        | 101/669 [02:44<14:55,  1.58s/it]

NM_001017963.2 11_10_9_8


 15%|█▌        | 102/669 [02:46<15:23,  1.63s/it]

NM_001017964.1 4_3_2_1


 15%|█▌        | 103/669 [02:47<15:57,  1.69s/it]

NM_001018.4 1_2_3


 16%|█▌        | 104/669 [02:49<15:04,  1.60s/it]

NM_001018837.1 3_4_5


 16%|█▌        | 105/669 [02:50<14:33,  1.55s/it]

NM_001020.5 4_3_2


 16%|█▌        | 106/669 [02:52<16:15,  1.73s/it]

NM_001021.4 3_2_1


 16%|█▌        | 107/669 [02:54<15:31,  1.66s/it]

NM_001021.4 4_3_2


 16%|█▌        | 108/669 [02:55<15:12,  1.63s/it]

NM_001023.3 3_2_1


 16%|█▋        | 109/669 [02:57<16:01,  1.72s/it]

NM_001024.3 1_2_3_4


 16%|█▋        | 110/669 [02:59<16:21,  1.76s/it]

NM_001024.3 2_3_4_5


 17%|█▋        | 111/669 [03:01<17:34,  1.89s/it]

NM_001024226.1 2_3_4


 17%|█▋        | 112/669 [03:03<16:19,  1.76s/it]

NM_001024382.1 1_3_4


 17%|█▋        | 113/669 [03:04<15:41,  1.69s/it]

NM_001024662.2 3_2_1


 17%|█▋        | 114/669 [03:06<15:38,  1.69s/it]

NM_001025205.1 8_9_10


 17%|█▋        | 115/669 [03:08<14:52,  1.61s/it]

NM_001025248.1 4_5_6


 17%|█▋        | 116/669 [03:09<14:17,  1.55s/it]

NM_001029.3 1_2_3


 17%|█▋        | 117/669 [03:10<13:56,  1.52s/it]

NM_001029991.1 8_9_11


 18%|█▊        | 118/669 [03:12<13:46,  1.50s/it]

NM_001029991.1 9_11_12


 18%|█▊        | 119/669 [03:13<13:45,  1.50s/it]

NM_001031.4 1_2_3


 18%|█▊        | 120/669 [03:15<13:44,  1.50s/it]

NM_001031677.3 3_2_1


 18%|█▊        | 121/669 [03:16<13:41,  1.50s/it]

NM_001031684.2 6_5_4_3


 18%|█▊        | 122/669 [03:18<14:13,  1.56s/it]

NM_001032291.2 5_4_3


 18%|█▊        | 123/669 [03:19<13:48,  1.52s/it]

NM_001033044.3 6_5_4_3


 19%|█▊        | 124/669 [03:21<13:46,  1.52s/it]

NM_001033678.3 4_3_2


 19%|█▊        | 125/669 [03:22<13:25,  1.48s/it]

NM_001033678.3 5_4_3


 19%|█▉        | 126/669 [03:24<13:00,  1.44s/it]

NM_001034116.1 7_6_5


 19%|█▉        | 127/669 [03:25<12:53,  1.43s/it]

NM_001034116.1 9_8_7


 19%|█▉        | 128/669 [03:27<12:56,  1.44s/it]

NM_001035006.2 4_3_2_1


 19%|█▉        | 129/669 [03:28<12:43,  1.41s/it]

NM_001037663.1 3_4_5_6


 19%|█▉        | 130/669 [03:29<12:51,  1.43s/it]

NM_001037811.2 5_4_2


 20%|█▉        | 131/669 [03:31<12:40,  1.41s/it]

NM_001039457.2 1_2_3_4


 20%|█▉        | 132/669 [03:33<13:47,  1.54s/it]

NM_001039457.2 2_3_4_5


 20%|█▉        | 133/669 [03:35<14:51,  1.66s/it]

NM_001039457.2 3_4_5_6


 20%|██        | 134/669 [03:37<15:37,  1.75s/it]

NM_001039465.1 4_5_6


 20%|██        | 135/669 [03:38<15:19,  1.72s/it]

NM_001039465.1 5_6_7


 20%|██        | 136/669 [03:40<15:20,  1.73s/it]

NM_001039707.1 7_6_5


 20%|██        | 137/669 [03:41<14:25,  1.63s/it]

NM_001039847.2 4_5_6


 21%|██        | 138/669 [03:43<13:44,  1.55s/it]

NM_001039848.2 4_5_6


 21%|██        | 139/669 [03:44<13:22,  1.51s/it]

NM_001040437.2 1_2_3


 21%|██        | 140/669 [03:46<13:24,  1.52s/it]

NM_001042529.2 1_2_3


 21%|██        | 141/669 [03:47<13:15,  1.51s/it]

NM_001042529.2 2_3_4


 21%|██        | 142/669 [03:49<13:02,  1.48s/it]

NM_001042532.3 2_3_4


 21%|██▏       | 143/669 [03:50<13:00,  1.48s/it]

NM_001042559.2 17_16_15


 22%|██▏       | 144/669 [03:52<12:59,  1.49s/it]

NM_001042559.2 18_17_16


 22%|██▏       | 145/669 [03:55<19:08,  2.19s/it]

NM_001042559.2 19_18_17


 22%|██▏       | 146/669 [03:57<17:47,  2.04s/it]

NM_001048171.1 8_7_6


 22%|██▏       | 147/669 [03:58<15:49,  1.82s/it]

NM_001048241.2 2_3_4


 22%|██▏       | 148/669 [04:00<14:30,  1.67s/it]

NM_001071.2 4_5_6


 22%|██▏       | 149/669 [04:01<13:35,  1.57s/it]

NM_001078175.2 6_7_12


 22%|██▏       | 150/669 [04:02<13:12,  1.53s/it]

NM_001098533.2 7_8_9


 23%|██▎       | 151/669 [04:04<12:34,  1.46s/it]

NM_001098533.2 8_9_10


 23%|██▎       | 152/669 [04:05<12:05,  1.40s/it]

NM_001101.3 3_2_1


 23%|██▎       | 153/669 [04:07<12:34,  1.46s/it]

NM_001101.3 4_3_2


 23%|██▎       | 154/669 [04:08<13:03,  1.52s/it]

NM_001101.3 5_4_3


 23%|██▎       | 155/669 [04:10<13:28,  1.57s/it]

NM_001101654.1 13_12_11


 23%|██▎       | 156/669 [04:11<12:41,  1.48s/it]

NM_001113182.2 10_11_12


 23%|██▎       | 157/669 [04:13<12:30,  1.47s/it]

NM_001113182.2 5_7_8


 24%|██▎       | 158/669 [04:14<12:07,  1.42s/it]

NM_001127218.2 7_6_5


 24%|██▍       | 159/669 [04:15<11:49,  1.39s/it]

NM_001127229.1 3_2_1


 24%|██▍       | 160/669 [04:17<11:35,  1.37s/it]

NM_001127230.1 3_2_1


 24%|██▍       | 161/669 [04:18<11:51,  1.40s/it]

NM_001128852.1 17_18_19


 24%|██▍       | 162/669 [04:20<11:49,  1.40s/it]

NM_001128853.1 17_18_19


 24%|██▍       | 163/669 [04:21<11:45,  1.39s/it]

NM_001130964.1 11_8_7


 25%|██▍       | 164/669 [04:22<11:28,  1.36s/it]

NM_001130964.1 13_12_11


 25%|██▍       | 165/669 [04:24<11:25,  1.36s/it]

NM_001130964.1 8_7_6


 25%|██▍       | 166/669 [04:25<11:16,  1.35s/it]

NM_001134231.1 9_8_7_5


 25%|██▍       | 167/669 [04:26<11:16,  1.35s/it]

NM_001135653.1 12_11_10_9


 25%|██▌       | 168/669 [04:28<11:24,  1.37s/it]

NM_001135653.1 13_12_11_10


 25%|██▌       | 169/669 [04:29<11:30,  1.38s/it]

NM_001135653.1 14_13_12_11


 25%|██▌       | 170/669 [04:31<11:41,  1.41s/it]

NM_001135653.1 15_14_13_12


 26%|██▌       | 171/669 [04:32<11:56,  1.44s/it]

NM_001135654.1 12_11_10_9


 26%|██▌       | 172/669 [04:33<11:32,  1.39s/it]

NM_001135821.1 4_6_7_9


 26%|██▌       | 173/669 [04:35<11:20,  1.37s/it]

NM_001135821.1 6_7_9_10


 26%|██▌       | 174/669 [04:36<11:11,  1.36s/it]

NM_001135861.2 1_2_3


 26%|██▌       | 175/669 [04:37<10:57,  1.33s/it]

NM_001136017.3 4_3_2


 26%|██▋       | 176/669 [04:39<10:53,  1.33s/it]

NM_001136035.2 10_9_8


 26%|██▋       | 177/669 [04:40<10:53,  1.33s/it]

NM_001136035.2 15_11_10


 27%|██▋       | 178/669 [04:41<11:04,  1.35s/it]

NM_001142350.1 3_4_5


 27%|██▋       | 179/669 [04:43<10:55,  1.34s/it]

NM_001142463.2 12_13_14


 27%|██▋       | 180/669 [04:44<10:42,  1.31s/it]

NM_001142601.1 1_2_4


 27%|██▋       | 181/669 [04:45<10:42,  1.32s/it]

NM_001142601.1 2_4_5


 27%|██▋       | 182/669 [04:46<10:40,  1.31s/it]

NM_001142777.1 2_3_4


 27%|██▋       | 183/669 [04:48<10:27,  1.29s/it]

NM_001142853.2 3_2_1


 28%|██▊       | 184/669 [04:49<10:27,  1.29s/it]

NM_001143679.1 1_2_3_5


 28%|██▊       | 185/669 [04:50<10:35,  1.31s/it]

NM_001143679.1 2_3_5_6


 28%|██▊       | 186/669 [04:52<10:46,  1.34s/it]

NM_001143681.1 3_5_6


 28%|██▊       | 187/669 [04:53<10:52,  1.35s/it]

NM_001143780.2 10_9_8_7


 28%|██▊       | 188/669 [04:55<10:55,  1.36s/it]

NM_001143780.2 7_6_5


 28%|██▊       | 189/669 [04:56<10:56,  1.37s/it]

NM_001143780.2 8_7_6


 28%|██▊       | 190/669 [04:57<10:54,  1.37s/it]

NM_001144012.2 2_3_4_5


 29%|██▊       | 191/669 [04:59<10:41,  1.34s/it]

NM_001144012.2 3_4_5_6


 29%|██▊       | 192/669 [05:00<11:00,  1.38s/it]

NM_001144831.1 5_4_3


 29%|██▉       | 193/669 [05:01<11:02,  1.39s/it]

NM_001144831.1 6_5_4


 29%|██▉       | 194/669 [05:03<11:02,  1.40s/it]

NM_001144831.1 7_6_5


 29%|██▉       | 195/669 [05:04<11:02,  1.40s/it]

NM_001145408.1 8_9_10


 29%|██▉       | 196/669 [05:06<10:59,  1.39s/it]

NM_001145408.1 9_10_11_12


 29%|██▉       | 197/669 [05:07<10:54,  1.39s/it]

NM_001152.4 1_2_3


 30%|██▉       | 198/669 [05:08<11:01,  1.41s/it]

NM_001159390.1 1_2_3


 30%|██▉       | 199/669 [05:10<10:43,  1.37s/it]

NM_001159936.1 4_3_1


 30%|██▉       | 200/669 [05:11<10:29,  1.34s/it]

NM_001160305.2 3_4_5


 30%|███       | 201/669 [05:12<10:13,  1.31s/it]

NM_001160389.1 4_3_2


 30%|███       | 202/669 [05:14<10:03,  1.29s/it]

NM_001160389.1 5_4_3


 30%|███       | 203/669 [05:15<09:53,  1.27s/it]

NM_001163560.2 8_7_6


 30%|███       | 204/669 [05:16<09:44,  1.26s/it]

NM_001166102.1 2_4_5


 31%|███       | 205/669 [05:17<09:58,  1.29s/it]

NM_001166102.1 4_5_6


 31%|███       | 206/669 [05:19<10:07,  1.31s/it]

NM_001166102.1 5_6_7_8


 31%|███       | 207/669 [05:20<10:14,  1.33s/it]

NM_001166356.1 6_7_8


 31%|███       | 208/669 [05:21<10:07,  1.32s/it]

NM_001177842.1 12_11_10


 31%|███       | 209/669 [05:23<09:58,  1.30s/it]

NM_001185092.1 1_2_3_4


 31%|███▏      | 210/669 [05:24<09:52,  1.29s/it]

NM_001185094.1 2_3_4


 32%|███▏      | 211/669 [05:25<09:41,  1.27s/it]

NM_001185181.2 1_2_3


 32%|███▏      | 212/669 [05:26<09:50,  1.29s/it]

NM_001190326.1 2_4_10


 32%|███▏      | 213/669 [05:28<10:25,  1.37s/it]

NM_001194954.1 5_6_15


 32%|███▏      | 214/669 [05:29<10:30,  1.38s/it]

NM_001195057.1 3_2_1


 32%|███▏      | 215/669 [05:31<10:12,  1.35s/it]

NM_001195218.1 4_3_2_1


 32%|███▏      | 216/669 [05:32<10:02,  1.33s/it]

NM_001195218.1 6_5_4_3


 32%|███▏      | 217/669 [05:33<10:04,  1.34s/it]

NM_001195218.1 7_6_5_4


 33%|███▎      | 218/669 [05:35<10:01,  1.33s/it]

NM_001195446.1 5_4_3


 33%|███▎      | 219/669 [05:36<10:32,  1.41s/it]

NM_001198719.1 8_7_6


 33%|███▎      | 220/669 [05:38<10:13,  1.37s/it]

NM_001199120.1 1_2_3


 33%|███▎      | 221/669 [05:39<10:10,  1.36s/it]

NM_001199121.1 1_2_3


 33%|███▎      | 222/669 [05:40<10:07,  1.36s/it]

NM_001199341.1 4_3_2_1


 33%|███▎      | 223/669 [05:41<09:53,  1.33s/it]

NM_001199344.1 4_3_2_1


 33%|███▎      | 224/669 [05:43<09:42,  1.31s/it]

NM_001199345.1 4_3_2_1


 34%|███▎      | 225/669 [05:44<09:33,  1.29s/it]

NM_001199797.1 3_2_1


 34%|███▍      | 226/669 [05:45<09:24,  1.28s/it]

NM_001199954.1 3_2_1


 34%|███▍      | 227/669 [05:46<09:21,  1.27s/it]

NM_001199954.1 4_3_2


 34%|███▍      | 228/669 [05:48<09:40,  1.32s/it]

NM_001199954.1 5_4_3


 34%|███▍      | 229/669 [05:49<09:56,  1.35s/it]

NM_001203247.1 11_10_9


 34%|███▍      | 230/669 [05:51<09:48,  1.34s/it]

NM_001203247.1 12_11_10


 35%|███▍      | 231/669 [05:52<09:36,  1.32s/it]

NM_001203247.1 13_12_11


 35%|███▍      | 232/669 [05:53<09:29,  1.30s/it]

NM_001204510.1 3_4_5_6


 35%|███▍      | 233/669 [05:55<09:42,  1.34s/it]

NM_001204510.1 4_5_6_7


 35%|███▍      | 234/669 [05:56<09:48,  1.35s/it]

NM_001242597.1 1_2_3_4


 35%|███▌      | 235/669 [05:57<09:48,  1.36s/it]

NM_001242597.1 2_3_4_5


 35%|███▌      | 236/669 [05:59<09:48,  1.36s/it]

NM_001242854.1 7_5_4


 35%|███▌      | 237/669 [06:00<09:37,  1.34s/it]

NM_001242855.1 4_3_2


 36%|███▌      | 238/669 [06:01<09:37,  1.34s/it]

NM_001243131.1 1_2_3


 36%|███▌      | 239/669 [06:03<09:47,  1.37s/it]

NM_001243156.1 12_10_9_8


 36%|███▌      | 240/669 [06:04<09:40,  1.35s/it]

NM_001243156.1 8_7_6


 36%|███▌      | 241/669 [06:05<09:39,  1.35s/it]

NM_001243156.1 9_8_7


 36%|███▌      | 242/669 [06:07<09:28,  1.33s/it]

NM_001243159.1 11_9_8


 36%|███▋      | 243/669 [06:08<09:19,  1.31s/it]

NM_001244249.1 1_2_3


 36%|███▋      | 244/669 [06:09<09:22,  1.32s/it]

NM_001244249.1 2_3_4


 37%|███▋      | 245/669 [06:11<09:29,  1.34s/it]

NM_001255.2 8_9_10


 37%|███▋      | 246/669 [06:12<09:32,  1.35s/it]

NM_001256120.1 7_6_5


 37%|███▋      | 247/669 [06:13<09:32,  1.36s/it]

NM_001256269.1 12_13_14


 37%|███▋      | 248/669 [06:15<09:31,  1.36s/it]

NM_001256399.1 5_4_3_2


 37%|███▋      | 249/669 [06:16<09:29,  1.36s/it]

NM_001256399.1 6_5_4_3


 37%|███▋      | 250/669 [06:18<09:29,  1.36s/it]

NM_001256402.1 4_3_2


 38%|███▊      | 251/669 [06:19<09:14,  1.33s/it]

NM_001256402.1 5_4_3


 38%|███▊      | 252/669 [06:20<09:18,  1.34s/it]

NM_001256534.1 4_3_2_1


 38%|███▊      | 253/669 [06:22<09:27,  1.37s/it]

NM_001256534.1 5_4_3_2


 38%|███▊      | 254/669 [06:23<09:33,  1.38s/it]

NM_001256534.1 6_5_4_3


 38%|███▊      | 255/669 [06:24<09:40,  1.40s/it]

NM_001256534.1 7_6_5_4


 38%|███▊      | 256/669 [06:26<09:52,  1.43s/it]

NM_001256577.2 1_2_3_4


 38%|███▊      | 257/669 [06:28<10:01,  1.46s/it]

NM_001256799.2 2_3_4_5


 39%|███▊      | 258/669 [06:29<10:24,  1.52s/it]

NM_001256799.2 3_4_5_6


 39%|███▊      | 259/669 [06:31<10:45,  1.57s/it]

NM_001256799.2 4_5_6_7


 39%|███▉      | 260/669 [06:33<11:05,  1.63s/it]

NM_001257293.1 10_7_5


 39%|███▉      | 261/669 [06:34<10:25,  1.53s/it]

NM_001257293.1 11_10_7


 39%|███▉      | 262/669 [06:35<09:59,  1.47s/it]

NM_001257293.1 12_11_10


 39%|███▉      | 263/669 [06:37<09:32,  1.41s/it]

NM_001257994.1 10_9_8_7


 39%|███▉      | 264/669 [06:38<09:23,  1.39s/it]

NM_001257994.1 11_10_9_8


 40%|███▉      | 265/669 [06:39<09:34,  1.42s/it]

NM_001258208.1 10_11_12


 40%|███▉      | 266/669 [06:41<09:27,  1.41s/it]

NM_001258384.2 10_9_8_7


 40%|███▉      | 267/669 [06:42<09:17,  1.39s/it]

NM_001258461.1 7_6_5_4


 40%|████      | 268/669 [06:43<09:08,  1.37s/it]

NM_001258461.1 8_7_6_5


 40%|████      | 269/669 [06:45<08:57,  1.34s/it]

NM_001258461.1 9_8_7_6


 40%|████      | 270/669 [06:46<08:51,  1.33s/it]

NM_001261834.1 10_9_8


 41%|████      | 271/669 [06:47<09:09,  1.38s/it]

NM_001261834.1 7_6_5_4


 41%|████      | 272/669 [06:49<09:12,  1.39s/it]

NM_001261834.1 9_8_7_6


 41%|████      | 273/669 [06:51<09:36,  1.46s/it]

NM_001267809.1 12_11_10_9


 41%|████      | 274/669 [06:52<09:27,  1.44s/it]

NM_001267809.1 13_12_11_10


 41%|████      | 275/669 [06:53<09:19,  1.42s/it]

NM_001270481.1 2_3_4


 41%|████▏     | 276/669 [06:55<09:03,  1.38s/it]

NM_001270491.1 3_4_5_6


 41%|████▏     | 277/669 [06:56<09:07,  1.40s/it]

NM_001271006.1 4_3_2_1


 42%|████▏     | 278/669 [06:57<08:54,  1.37s/it]

NM_001271007.1 3_2_1


 42%|████▏     | 279/669 [06:59<08:50,  1.36s/it]

NM_001271285.1 1_2_3


 42%|████▏     | 280/669 [07:00<08:46,  1.35s/it]

NM_001271681.1 5_4_3


 42%|████▏     | 281/669 [07:01<08:42,  1.35s/it]

NM_001271828.1 2_3_4


 42%|████▏     | 282/669 [07:03<08:34,  1.33s/it]

NM_001271969.1 9_10_11


 42%|████▏     | 283/669 [07:04<08:51,  1.38s/it]

NM_001276405.1 10_9_7


 42%|████▏     | 284/669 [07:05<08:33,  1.33s/it]

NM_001276405.1 11_10_9


 43%|████▎     | 285/669 [07:07<08:20,  1.30s/it]

NM_001277764.1 5_3_2


 43%|████▎     | 286/669 [07:08<08:14,  1.29s/it]

NM_001278197.1 3_5_7_8


 43%|████▎     | 287/669 [07:09<08:10,  1.29s/it]

NM_001278217.1 1_3_6


 43%|████▎     | 288/669 [07:10<08:05,  1.28s/it]

NM_001278217.1 3_6_7


 43%|████▎     | 289/669 [07:12<08:03,  1.27s/it]

NM_001278639.1 3_4_5


 43%|████▎     | 290/669 [07:13<08:24,  1.33s/it]

NM_001278640.1 3_4_5


 43%|████▎     | 291/669 [07:15<08:38,  1.37s/it]

NM_001278785.1 1_2_3


 44%|████▎     | 292/669 [07:16<08:29,  1.35s/it]

NM_001278786.1 1_2_3


 44%|████▍     | 293/669 [07:17<08:23,  1.34s/it]

NM_001282112.1 17_15_13


 44%|████▍     | 294/669 [07:19<08:23,  1.34s/it]

NM_001282280.1 7_6_5


 44%|████▍     | 295/669 [07:20<08:24,  1.35s/it]

NM_001282385.1 3_4_5


 44%|████▍     | 296/669 [07:21<08:43,  1.40s/it]

NM_001282385.1 4_5_6


 44%|████▍     | 297/669 [07:23<08:53,  1.43s/it]

NM_001282907.1 11_10_9


 45%|████▍     | 298/669 [07:24<08:53,  1.44s/it]

NM_001284377.1 2_3_4_5


 45%|████▍     | 299/669 [07:26<08:46,  1.42s/it]

NM_001284377.1 4_5_6


 45%|████▍     | 300/669 [07:27<08:46,  1.43s/it]

NM_001284378.1 2_3_4


 45%|████▍     | 301/669 [07:29<08:44,  1.42s/it]

NM_001284379.1 2_3_4


 45%|████▌     | 302/669 [07:30<08:41,  1.42s/it]

NM_001286077.1 7_6_5


 45%|████▌     | 303/669 [07:31<08:44,  1.43s/it]

NM_001286077.1 8_7_6


 45%|████▌     | 304/669 [07:33<08:42,  1.43s/it]

NM_001286082.1 3_2_1


 46%|████▌     | 305/669 [07:34<08:41,  1.43s/it]

NM_001286134.1 7_8_9


 46%|████▌     | 306/669 [07:36<08:36,  1.42s/it]

NM_001286216.1 10_9_6


 46%|████▌     | 307/669 [07:37<08:32,  1.42s/it]

NM_001286216.1 11_10_9


 46%|████▌     | 308/669 [07:39<08:39,  1.44s/it]

NM_001286216.1 9_6_5


 46%|████▌     | 309/669 [07:40<08:34,  1.43s/it]

NM_001286272.1 3_2_1


 46%|████▋     | 310/669 [07:41<08:34,  1.43s/it]

NM_001286272.1 4_3_2


 46%|████▋     | 311/669 [07:43<08:27,  1.42s/it]

NM_001286451.1 3_2_1


 47%|████▋     | 312/669 [07:44<08:21,  1.41s/it]

NM_001287032.1 6_5_2


 47%|████▋     | 313/669 [07:46<08:19,  1.40s/it]

NM_001287188.1 10_9_8


 47%|████▋     | 314/669 [07:47<08:41,  1.47s/it]

NM_001287188.1 9_8_7


 47%|████▋     | 315/669 [07:49<08:57,  1.52s/it]

NM_001287343.1 1_2_3


 47%|████▋     | 316/669 [07:50<08:28,  1.44s/it]

NM_001287387.1 3_2_1


 47%|████▋     | 317/669 [07:51<08:07,  1.39s/it]

NM_001287482.1 1_2_3


 48%|████▊     | 318/669 [07:53<08:08,  1.39s/it]

NM_001287485.1 1_2_3


 48%|████▊     | 319/669 [07:54<07:56,  1.36s/it]

NM_001287486.1 1_2_3


 48%|████▊     | 320/669 [07:55<07:50,  1.35s/it]

NM_001290137.1 2_3_4_5


 48%|████▊     | 321/669 [07:57<07:37,  1.31s/it]

NM_001290137.1 6_7_8


 48%|████▊     | 322/669 [07:58<07:31,  1.30s/it]

NM_001291428.1 1_2_3


 48%|████▊     | 323/669 [07:59<07:22,  1.28s/it]

NM_001291428.1 2_3_5


 48%|████▊     | 324/669 [08:00<07:15,  1.26s/it]

NM_001293197.1 1_3_4_5


 49%|████▊     | 325/669 [08:02<07:21,  1.28s/it]

NM_001294333.1 1_2_3_4


 49%|████▊     | 326/669 [08:03<08:11,  1.43s/it]

NM_001294333.1 2_3_4_5


 49%|████▉     | 327/669 [08:05<08:36,  1.51s/it]

NM_001297604.1 4_3_2


 49%|████▉     | 328/669 [08:07<08:17,  1.46s/it]

NM_001300796.1 1_2_3


 49%|████▉     | 329/669 [08:08<08:11,  1.45s/it]

NM_001300797.1 1_2_3


 49%|████▉     | 330/669 [08:09<08:08,  1.44s/it]

NM_001300981.1 1_2_3_4


 49%|████▉     | 331/669 [08:11<08:22,  1.49s/it]

NM_001301020.1 1_3_4_5


 50%|████▉     | 332/669 [08:15<12:30,  2.23s/it]

NM_001301339.1 3_2_1


 50%|████▉     | 333/669 [08:16<11:21,  2.03s/it]

NM_001303252.1 7_6_3


 50%|████▉     | 334/669 [08:18<10:13,  1.83s/it]

NM_001303624.1 1_2_3_4


 50%|█████     | 335/669 [08:20<10:46,  1.93s/it]

NM_001303624.1 2_3_4_5


 50%|█████     | 336/669 [08:22<11:26,  2.06s/it]

NM_001303625.1 1_2_3_4


 50%|█████     | 337/669 [08:24<10:44,  1.94s/it]

NM_001303626.1 1_2_3_4


 51%|█████     | 338/669 [08:26<11:01,  2.00s/it]

NM_001303626.1 2_3_4_5


 51%|█████     | 339/669 [08:29<11:32,  2.10s/it]

NM_001304288.1 2_3_4


 51%|█████     | 340/669 [08:30<10:20,  1.89s/it]

NM_001304288.1 3_4_5_6


 51%|█████     | 341/669 [08:31<09:34,  1.75s/it]

NM_001313972.1 5_4_3_2


 51%|█████     | 342/669 [08:33<09:12,  1.69s/it]

NM_001313972.1 6_5_4_3


 51%|█████▏    | 343/669 [08:34<08:57,  1.65s/it]

NM_001317062.1 3_2_1


 51%|█████▏    | 344/669 [08:36<08:32,  1.58s/it]

NM_001317783.1 5_4_3


 52%|█████▏    | 345/669 [08:37<08:13,  1.52s/it]

NM_001317975.1 3_2_1


 52%|█████▏    | 346/669 [08:39<08:04,  1.50s/it]

NM_001317996.1 17_18_19_20


 52%|█████▏    | 347/669 [08:40<07:47,  1.45s/it]

NM_001318786.1 2_3_4


 52%|█████▏    | 348/669 [08:42<08:19,  1.56s/it]

NM_001318788.1 2_3_4


 52%|█████▏    | 349/669 [08:44<08:40,  1.63s/it]

NM_001318794.1 2_3_4


 52%|█████▏    | 350/669 [08:45<08:13,  1.55s/it]

NM_001318797.1 2_3_4


 52%|█████▏    | 351/669 [08:47<08:35,  1.62s/it]

NM_001318876.1 2_3_4_5


 53%|█████▎    | 352/669 [08:48<08:26,  1.60s/it]

NM_001318876.1 3_4_5_6


 53%|█████▎    | 353/669 [08:50<08:20,  1.59s/it]

NM_001318876.1 4_5_6_7


 53%|█████▎    | 354/669 [08:51<08:18,  1.58s/it]

NM_001319165.1 1_2_3_4


 53%|█████▎    | 355/669 [08:53<08:04,  1.54s/it]

NM_001319165.1 2_3_4_5


 53%|█████▎    | 356/669 [08:54<07:53,  1.51s/it]

NM_001319166.1 3_4_5


 53%|█████▎    | 357/669 [08:56<07:39,  1.47s/it]

NM_001319167.1 3_4_5


 54%|█████▎    | 358/669 [08:57<07:33,  1.46s/it]

NM_001320.6 3_4_5


 54%|█████▎    | 359/669 [08:59<07:50,  1.52s/it]

NM_001320.6 4_5_6


 54%|█████▍    | 360/669 [09:00<08:01,  1.56s/it]

NM_001320137.1 3_2_1


 54%|█████▍    | 361/669 [09:02<07:43,  1.51s/it]

NM_001320214.1 1_4_5


 54%|█████▍    | 362/669 [09:03<07:44,  1.51s/it]

NM_001320304.1 3_4_5_6


 54%|█████▍    | 363/669 [09:05<07:36,  1.49s/it]

NM_001320304.1 4_5_6_7


 54%|█████▍    | 364/669 [09:06<07:28,  1.47s/it]

NM_001320305.1 5_6_7


 55%|█████▍    | 365/669 [09:08<07:25,  1.46s/it]

NM_001320306.1 3_4_5_6


 55%|█████▍    | 366/669 [09:09<07:20,  1.45s/it]

NM_001320307.1 1_3_4


 55%|█████▍    | 367/669 [09:10<07:10,  1.42s/it]

NM_001320309.1 2_3_4_5


 55%|█████▌    | 368/669 [09:12<07:08,  1.42s/it]

NM_001320310.1 1_3_4


 55%|█████▌    | 369/669 [09:13<07:10,  1.43s/it]

NM_001320311.1 1_3_4


 55%|█████▌    | 370/669 [09:15<07:11,  1.44s/it]

NM_001320311.1 3_4_5_6


 55%|█████▌    | 371/669 [09:16<07:17,  1.47s/it]

NM_001320311.1 4_5_6_7


 56%|█████▌    | 372/669 [09:18<07:16,  1.47s/it]

NM_001320417.1 6_4_3


 56%|█████▌    | 373/669 [09:19<07:05,  1.44s/it]

NM_001320595.1 10_9_8_7


 56%|█████▌    | 374/669 [09:23<11:14,  2.29s/it]

NM_001320595.1 12_11_10_9


 56%|█████▌    | 375/669 [09:25<10:25,  2.13s/it]

NM_001320595.1 13_12_11_10


 56%|█████▌    | 376/669 [09:27<09:48,  2.01s/it]

NM_001321111.1 4_3_2


 56%|█████▋    | 377/669 [09:28<08:57,  1.84s/it]

NM_001321170.1 7_9_10


 57%|█████▋    | 378/669 [09:30<08:17,  1.71s/it]

NM_001321170.1 9_10_11


 57%|█████▋    | 379/669 [09:31<07:48,  1.62s/it]

NM_001321241.1 10_9_8_7


 57%|█████▋    | 380/669 [09:33<07:28,  1.55s/it]

NM_001321241.1 8_7_6


 57%|█████▋    | 381/669 [09:34<07:12,  1.50s/it]

NM_001321335.1 1_2_3


 57%|█████▋    | 382/669 [09:35<06:58,  1.46s/it]

NM_001321382.1 5_4_3


 57%|█████▋    | 383/669 [09:37<06:41,  1.41s/it]

NM_001321728.1 3_4_5


 57%|█████▋    | 384/669 [09:38<06:30,  1.37s/it]

NM_001321732.1 5_4_3


 58%|█████▊    | 385/669 [09:39<06:25,  1.36s/it]

NM_001321844.1 3_4_5_6


 58%|█████▊    | 386/669 [09:41<06:20,  1.34s/it]

NM_001322434.1 6_7_8


 58%|█████▊    | 387/669 [09:42<06:17,  1.34s/it]

NM_001322434.1 7_8_9


 58%|█████▊    | 388/669 [09:43<06:16,  1.34s/it]

NM_001323918.1 8_7_6


 58%|█████▊    | 389/669 [09:44<06:09,  1.32s/it]

NM_001324081.1 5_4_3


 58%|█████▊    | 390/669 [09:46<06:10,  1.33s/it]

NM_001363.4 12_13_14


 58%|█████▊    | 391/669 [09:47<06:07,  1.32s/it]

NM_001382.3 7_6_5


 59%|█████▊    | 392/669 [09:48<06:09,  1.33s/it]

NM_001382.3 8_7_6


 59%|█████▊    | 393/669 [09:50<06:10,  1.34s/it]

NM_001384.4 1_2_3_4


 59%|█████▉    | 394/669 [09:51<06:06,  1.33s/it]

NM_001402.5 5_4_3_2


 59%|█████▉    | 395/669 [09:53<06:08,  1.35s/it]

NM_001402.5 6_5_4_3


 59%|█████▉    | 396/669 [09:54<06:10,  1.36s/it]

NM_001416.3 6_7_8_9


 59%|█████▉    | 397/669 [09:55<06:19,  1.40s/it]

NM_001416.3 7_8_9_10


 59%|█████▉    | 398/669 [09:57<06:24,  1.42s/it]

NM_001517.4 9_10_12


 60%|█████▉    | 399/669 [09:58<06:12,  1.38s/it]

NM_001641.3 1_2_3


 60%|█████▉    | 400/669 [09:59<06:04,  1.36s/it]

NM_001686.3 5_4_2_1


 60%|█████▉    | 401/669 [10:01<06:39,  1.49s/it]

NM_001686.3 6_5_4_2


 60%|██████    | 402/669 [10:03<07:10,  1.61s/it]

NM_001686.3 8_6_5


 60%|██████    | 403/669 [10:05<07:41,  1.73s/it]

NM_001823.4 5_4_3_2


 60%|██████    | 404/669 [10:07<07:30,  1.70s/it]

NM_001823.4 6_5_4_3


 61%|██████    | 405/669 [10:08<07:23,  1.68s/it]

NM_001823.4 7_6_5_4


 61%|██████    | 406/669 [10:10<07:06,  1.62s/it]

NM_001862.2 1_2_3


 61%|██████    | 407/669 [10:12<07:15,  1.66s/it]

NM_001916.4 1_2_3_4


 61%|██████    | 408/669 [10:13<06:59,  1.61s/it]

NM_001916.4 3_4_5


 61%|██████    | 409/669 [10:15<06:52,  1.59s/it]

NM_001924.3 1_2_3


 61%|██████▏   | 410/669 [10:16<06:25,  1.49s/it]

NM_001967.3 1_2_3_4


 61%|██████▏   | 411/669 [10:18<06:28,  1.51s/it]

NM_001967.3 3_4_5_6


 62%|██████▏   | 412/669 [10:19<06:36,  1.54s/it]

NM_001967.3 4_5_6_7


 62%|██████▏   | 413/669 [10:21<06:46,  1.59s/it]

NM_001967.3 5_6_7_8


 62%|██████▏   | 414/669 [10:23<07:27,  1.75s/it]

NM_001967.3 7_8_9_10


 62%|██████▏   | 415/669 [10:25<07:27,  1.76s/it]

NM_001997.4 4_3_2_1


 62%|██████▏   | 416/669 [10:26<07:12,  1.71s/it]

NM_002106.3 3_2_1


 62%|██████▏   | 417/669 [10:28<06:57,  1.66s/it]

NM_002106.3 4_3_2


 62%|██████▏   | 418/669 [10:29<06:45,  1.61s/it]

NM_002136.3 1_2_3


 63%|██████▎   | 419/669 [10:31<06:21,  1.53s/it]

NM_002136.3 3_4_5


 63%|██████▎   | 420/669 [10:32<06:05,  1.47s/it]

NM_002136.3 4_5_6_7


 63%|██████▎   | 421/669 [10:33<05:53,  1.43s/it]

NM_002136.3 5_6_7_8


 63%|██████▎   | 422/669 [10:35<05:45,  1.40s/it]

NM_002136.3 6_7_8_9


 63%|██████▎   | 423/669 [10:36<05:39,  1.38s/it]

NM_002157.2 1_2_3


 63%|██████▎   | 424/669 [10:37<05:31,  1.35s/it]

NM_002295.5 2_3_4


 64%|██████▎   | 425/669 [10:39<05:27,  1.34s/it]

NM_002295.5 3_4_5_6


 64%|██████▎   | 426/669 [10:40<05:26,  1.34s/it]

NM_002513.2 3_2_1


 64%|██████▍   | 427/669 [10:41<05:20,  1.32s/it]

NM_002688.5 8_9_10


 64%|██████▍   | 428/669 [10:43<05:19,  1.33s/it]

NM_002792.3 6_5_4_3


 64%|██████▍   | 429/669 [10:44<05:22,  1.35s/it]

NM_002796.2 1_2_3_4


 64%|██████▍   | 430/669 [10:45<05:23,  1.35s/it]

NM_002796.2 2_3_4_5


 64%|██████▍   | 431/669 [10:47<05:26,  1.37s/it]

NM_002796.2 3_4_5_6


 65%|██████▍   | 432/669 [10:48<05:29,  1.39s/it]

NM_002880.3 15_14_13


 65%|██████▍   | 433/669 [10:50<05:21,  1.36s/it]

NM_002916.3 10_9_8


 65%|██████▍   | 434/669 [10:51<05:19,  1.36s/it]

NM_002917.1 4_3_2


 65%|██████▌   | 435/669 [10:52<05:09,  1.32s/it]

NM_002917.1 5_4_3


 65%|██████▌   | 436/669 [10:53<05:05,  1.31s/it]

NM_002952.3 6_5_4_3


 65%|██████▌   | 437/669 [10:55<05:31,  1.43s/it]

NM_003211.4 3_5_6


 65%|██████▌   | 438/669 [10:56<05:18,  1.38s/it]

NM_003259.3 8_9_10


 66%|██████▌   | 439/669 [10:58<05:08,  1.34s/it]

NM_003321.4 7_6_5_4


 66%|██████▌   | 440/669 [10:59<05:32,  1.45s/it]

NM_003321.4 9_8_7


 66%|██████▌   | 441/669 [11:01<05:50,  1.54s/it]

NM_003755.3 10_9_7


 66%|██████▌   | 442/669 [11:02<05:34,  1.47s/it]

NM_003755.3 7_6_5


 66%|██████▌   | 443/669 [11:04<05:27,  1.45s/it]

NM_003801.3 6_8_9


 66%|██████▋   | 444/669 [11:05<05:16,  1.41s/it]

NM_003801.3 8_9_10


 67%|██████▋   | 445/669 [11:06<05:08,  1.38s/it]

NM_003819.3 12_11_10_9


 67%|██████▋   | 446/669 [11:08<05:02,  1.36s/it]

NM_003819.3 13_12_11_10


 67%|██████▋   | 447/669 [11:09<04:58,  1.35s/it]

NM_003907.2 7_12_13


 67%|██████▋   | 448/669 [11:10<04:50,  1.32s/it]

NM_003968.3 14_13_12


 67%|██████▋   | 449/669 [11:11<04:42,  1.28s/it]

NM_004083.5 3_2_1


 67%|██████▋   | 450/669 [11:13<04:38,  1.27s/it]

NM_004152.3 1_3_4_5


 67%|██████▋   | 451/669 [11:14<04:56,  1.36s/it]

NM_004178.4 4_5_6


 68%|██████▊   | 452/669 [11:16<04:52,  1.35s/it]

NM_004178.4 6_7_8


 68%|██████▊   | 453/669 [11:17<04:46,  1.33s/it]

NM_004260.3 17_16_15


 68%|██████▊   | 454/669 [11:18<04:40,  1.30s/it]

NM_004295.3 1_2_3


 68%|██████▊   | 455/669 [11:19<04:41,  1.31s/it]

NM_004295.3 2_3_4


 68%|██████▊   | 456/669 [11:21<04:44,  1.34s/it]

NM_004295.3 3_4_5_6


 68%|██████▊   | 457/669 [11:22<04:49,  1.36s/it]

NM_004300.3 3_4_5


 68%|██████▊   | 458/669 [11:24<04:43,  1.35s/it]

NM_004489.4 10_9_8


 69%|██████▊   | 459/669 [11:25<04:38,  1.33s/it]

NM_004489.4 6_5_4


 69%|██████▉   | 460/669 [11:26<04:38,  1.33s/it]

NM_004489.4 8_7_6_5


 69%|██████▉   | 461/669 [11:28<04:38,  1.34s/it]

NM_004489.4 9_8_7_6


 69%|██████▉   | 462/669 [11:29<04:31,  1.31s/it]

NM_004493.2 5_4_2


 69%|██████▉   | 463/669 [11:30<04:27,  1.30s/it]

NM_004548.2 1_2_3


 69%|██████▉   | 464/669 [11:31<04:28,  1.31s/it]

NM_004551.2 1_2_3


 70%|██████▉   | 465/669 [11:33<04:29,  1.32s/it]

NM_004582.3 1_2_3


 70%|██████▉   | 466/669 [11:34<04:24,  1.30s/it]

NM_004582.3 2_3_4


 70%|██████▉   | 467/669 [11:35<04:21,  1.29s/it]

NM_004582.3 4_5_6_7


 70%|██████▉   | 468/669 [11:37<04:18,  1.29s/it]

NM_004582.3 6_7_8


 70%|███████   | 469/669 [11:38<04:33,  1.37s/it]

NM_004640.6 10_9_8_7


 70%|███████   | 470/669 [11:40<04:56,  1.49s/it]

NM_004640.6 7_6_1


 70%|███████   | 471/669 [11:41<04:43,  1.43s/it]

NM_004640.6 9_8_7_6


 71%|███████   | 472/669 [11:43<05:02,  1.53s/it]

NM_004699.3 7_8_9


 71%|███████   | 473/669 [11:44<04:46,  1.46s/it]

NM_004708.3 3_4_5


 71%|███████   | 474/669 [11:46<04:37,  1.42s/it]

NM_004889.3 3_2_1


 71%|███████   | 475/669 [11:47<04:30,  1.39s/it]

NM_004939.2 21_22_23


 71%|███████   | 476/669 [11:48<04:20,  1.35s/it]

NM_004965.6 4_3_2


 71%|███████▏  | 477/669 [11:49<04:14,  1.32s/it]

NM_004990.3 18_19_20


 71%|███████▏  | 478/669 [11:51<04:12,  1.32s/it]

NM_005015.3 2_3_4_5


 72%|███████▏  | 479/669 [11:52<04:10,  1.32s/it]

NM_005324.4 3_2_1


 72%|███████▏  | 480/669 [11:53<04:11,  1.33s/it]

NM_005381.2 9_8_7


 72%|███████▏  | 481/669 [11:55<04:09,  1.33s/it]

NM_005455.4 10_9_8


 72%|███████▏  | 482/669 [11:56<04:04,  1.31s/it]

NM_005507.2 3_2_1


 72%|███████▏  | 483/669 [11:57<04:07,  1.33s/it]

NM_005510.3 3_2_1


 72%|███████▏  | 484/669 [11:59<04:02,  1.31s/it]

NM_005510.3 5_4_3_2


 72%|███████▏  | 485/669 [12:00<04:03,  1.32s/it]

NM_005510.3 6_5_4_3


 73%|███████▎  | 486/669 [12:01<04:02,  1.32s/it]

NM_005517.3 1_2_3_4


 73%|███████▎  | 487/669 [12:03<04:12,  1.39s/it]

NM_005517.3 2_3_4_5


 73%|███████▎  | 488/669 [12:04<04:21,  1.45s/it]

NM_005679.3 11_9_8


 73%|███████▎  | 489/669 [12:06<04:17,  1.43s/it]

NM_005679.3 9_8_7


 73%|███████▎  | 490/669 [12:07<04:13,  1.42s/it]

NM_005729.3 3_4_5


 73%|███████▎  | 491/669 [12:09<04:15,  1.43s/it]

NM_005762.2 14_15_16


 74%|███████▎  | 492/669 [12:11<04:36,  1.56s/it]

NM_005801.3 1_2_3


 74%|███████▎  | 493/669 [12:12<04:44,  1.62s/it]

NM_005804.3 6_5_4_2


 74%|███████▍  | 494/669 [12:14<04:32,  1.56s/it]

NM_005804.3 8_7_6


 74%|███████▍  | 495/669 [12:15<04:26,  1.53s/it]

NM_005861.3 1_3_4_5


 74%|███████▍  | 496/669 [12:17<04:14,  1.47s/it]

NM_005911.5 1_2_3_4


 74%|███████▍  | 497/669 [12:19<04:47,  1.67s/it]

NM_005911.5 4_5_6_7


 74%|███████▍  | 498/669 [12:21<05:14,  1.84s/it]

NM_005911.5 5_6_7_8


 75%|███████▍  | 499/669 [12:23<05:37,  1.98s/it]

NM_005984.4 4_3_2_1


 75%|███████▍  | 500/669 [12:25<05:08,  1.82s/it]

NM_006088.5 1_2_3


 75%|███████▍  | 501/669 [12:26<04:46,  1.71s/it]

NM_006098.4 5_4_3


 75%|███████▌  | 502/669 [12:28<04:53,  1.76s/it]

NM_006098.4 7_6_5_4


 75%|███████▌  | 503/669 [12:30<04:55,  1.78s/it]

NM_006325.4 1_2_3_4


 75%|███████▌  | 504/669 [12:31<04:32,  1.65s/it]

NM_006331.7 3_4_5


 75%|███████▌  | 505/669 [12:32<04:12,  1.54s/it]

NM_006351.3 12_11_10


 76%|███████▌  | 506/669 [12:34<03:57,  1.46s/it]

NM_006384.3 5_3_2


 76%|███████▌  | 507/669 [12:35<03:45,  1.39s/it]

NM_006392.3 3_5_6_7


 76%|███████▌  | 508/669 [12:36<03:46,  1.41s/it]

NM_006392.3 5_6_7_8


 76%|███████▌  | 509/669 [12:38<03:44,  1.41s/it]

NM_006392.3 6_7_8_9


 76%|███████▌  | 510/669 [12:39<03:44,  1.41s/it]

NM_006392.3 7_8_9_10


 76%|███████▋  | 511/669 [12:41<03:44,  1.42s/it]

NM_006392.3 8_9_10_11


 77%|███████▋  | 512/669 [12:42<03:45,  1.44s/it]

NM_006461.3 22_21_20


 77%|███████▋  | 513/669 [12:43<03:39,  1.40s/it]

NM_006461.3 23_22_21


 77%|███████▋  | 514/669 [12:45<03:37,  1.40s/it]

NM_006472.5 3_2_1


 77%|███████▋  | 515/669 [12:47<03:48,  1.48s/it]

NM_006480.4 5_6_7


 77%|███████▋  | 516/669 [12:48<03:44,  1.47s/it]

NM_006597.5 5_4_3_2


 77%|███████▋  | 517/669 [12:51<05:07,  2.02s/it]

NM_006597.5 6_5_4_3


 77%|███████▋  | 518/669 [12:54<05:51,  2.33s/it]

NM_006597.5 7_6_5_4


 78%|███████▊  | 519/669 [12:57<06:15,  2.50s/it]

NM_006597.5 8_7_6_5


 78%|███████▊  | 520/669 [13:00<06:37,  2.67s/it]

NM_006694.3 3_2_1


 78%|███████▊  | 521/669 [13:02<05:51,  2.37s/it]

NM_006712.4 3_2_1


 78%|███████▊  | 522/669 [13:03<05:07,  2.09s/it]

NM_006712.4 4_3_2


 78%|███████▊  | 523/669 [13:05<04:38,  1.91s/it]

NM_006743.4 2_3_4_5


 78%|███████▊  | 524/669 [13:06<04:20,  1.80s/it]

NM_006743.4 3_4_5_6


 78%|███████▊  | 525/669 [13:08<04:14,  1.76s/it]

NM_006755.1 4_5_6


 79%|███████▊  | 526/669 [13:10<04:07,  1.73s/it]

NM_006764.4 8_7_6_5


 79%|███████▉  | 527/669 [13:11<04:01,  1.70s/it]

NM_006799.3 3_4_5


 79%|███████▉  | 528/669 [13:13<03:53,  1.65s/it]

NM_006899.4 11_10_9


 79%|███████▉  | 529/669 [13:14<03:44,  1.60s/it]

NM_006925.4 1_4_5


 79%|███████▉  | 530/669 [13:16<03:43,  1.60s/it]

NM_007057.3 8_3_2


 79%|███████▉  | 531/669 [13:17<03:34,  1.55s/it]

NM_007099.3 3_4_5


 80%|███████▉  | 532/669 [13:19<03:26,  1.50s/it]

NM_007103.3 1_2_4_5


 80%|███████▉  | 533/669 [13:20<03:28,  1.53s/it]

NM_007104.4 1_2_3_4


 80%|███████▉  | 534/669 [13:23<03:49,  1.70s/it]

NM_007104.4 2_3_4_5


 80%|███████▉  | 535/669 [13:25<04:26,  1.99s/it]

NM_007209.3 3_2_1


 80%|████████  | 536/669 [13:27<04:06,  1.85s/it]

NM_007260.2 7_8_9


 80%|████████  | 537/669 [13:28<03:47,  1.73s/it]

NM_012071.3 2_3_4_5


 80%|████████  | 538/669 [13:30<03:35,  1.64s/it]

NM_012071.3 3_4_5_6


 81%|████████  | 539/669 [13:31<03:23,  1.57s/it]

NM_012071.3 4_5_6_7


 81%|████████  | 540/669 [13:32<03:16,  1.52s/it]

NM_012079.5 10_7_6


 81%|████████  | 541/669 [13:34<03:12,  1.51s/it]

NM_012162.3 5_4_3_2


 81%|████████  | 542/669 [13:35<03:08,  1.48s/it]

NM_012162.3 6_5_4_3


 81%|████████  | 543/669 [13:37<03:07,  1.49s/it]

NM_012162.3 8_7_6


 81%|████████▏ | 544/669 [13:38<03:09,  1.51s/it]

NM_012225.3 1_2_3


 81%|████████▏ | 545/669 [13:40<03:05,  1.49s/it]

NM_012402.3 4_3_2


 82%|████████▏ | 546/669 [13:41<03:02,  1.49s/it]

NM_012423.3 2_3_4_5


 82%|████████▏ | 547/669 [13:43<03:06,  1.53s/it]

NM_012423.3 3_4_5_6


 82%|████████▏ | 548/669 [13:44<03:05,  1.53s/it]

NM_013237.3 2_3_4


 82%|████████▏ | 549/669 [13:46<02:59,  1.50s/it]

NM_013328.3 5_4_3_2


 82%|████████▏ | 550/669 [13:47<03:00,  1.52s/it]

NM_013328.3 6_5_4_3


 82%|████████▏ | 551/669 [13:49<03:00,  1.53s/it]

NM_013379.2 10_9_8_7


 83%|████████▎ | 552/669 [13:51<03:00,  1.54s/it]

NM_013379.2 5_4_3


 83%|████████▎ | 553/669 [13:52<03:00,  1.56s/it]

NM_013379.2 8_7_5_4


 83%|████████▎ | 554/669 [13:54<02:57,  1.54s/it]

NM_013379.2 9_8_7_5


 83%|████████▎ | 555/669 [13:55<02:54,  1.53s/it]

NM_014366.4 10_11_12


 83%|████████▎ | 556/669 [13:57<02:51,  1.52s/it]

NM_014366.4 9_10_11


 83%|████████▎ | 557/669 [13:58<02:48,  1.51s/it]

NM_014596.5 1_2_3


 83%|████████▎ | 558/669 [14:00<02:46,  1.50s/it]

NM_015201.4 9_8_7


 84%|████████▎ | 559/669 [14:01<02:44,  1.50s/it]

NM_015917.2 1_2_3_4


 84%|████████▎ | 560/669 [14:03<02:44,  1.51s/it]

NM_015917.2 2_3_4_6


 84%|████████▍ | 561/669 [14:04<02:44,  1.53s/it]

NM_015917.2 3_4_6_7


 84%|████████▍ | 562/669 [14:06<02:46,  1.55s/it]

NM_015918.3 3_2_1


 84%|████████▍ | 563/669 [14:07<02:40,  1.51s/it]

NM_015918.3 4_3_2


 84%|████████▍ | 564/669 [14:09<02:36,  1.50s/it]

NM_015956.2 5_7_8


 84%|████████▍ | 565/669 [14:10<02:36,  1.51s/it]

NM_015959.3 3_4_5_6


 85%|████████▍ | 566/669 [14:12<02:39,  1.55s/it]

NM_016286.3 4_3_2_1


 85%|████████▍ | 567/669 [14:13<02:36,  1.53s/it]

NM_016368.4 5_4_3


 85%|████████▍ | 568/669 [14:15<02:33,  1.52s/it]

NM_016404.2 3_2_1


 85%|████████▌ | 569/669 [14:16<02:31,  1.52s/it]

NM_016538.2 8_7_6_5


 85%|████████▌ | 570/669 [14:18<02:27,  1.49s/it]

NM_017706.4 2_3_5


 85%|████████▌ | 571/669 [14:19<02:24,  1.48s/it]

NM_017900.2 3_2_1


 86%|████████▌ | 572/669 [14:21<02:22,  1.46s/it]

NM_017916.2 5_4_3


 86%|████████▌ | 573/669 [14:22<02:19,  1.46s/it]

NM_017952.5 16_17_18


 86%|████████▌ | 574/669 [14:24<02:17,  1.45s/it]

NM_018049.2 5_4_3


 86%|████████▌ | 575/669 [14:25<02:16,  1.45s/it]

NM_018321.3 5_6_7


 86%|████████▌ | 576/669 [14:27<02:14,  1.45s/it]

NM_018321.3 6_7_8


 86%|████████▌ | 577/669 [14:28<02:12,  1.44s/it]

NM_018645.5 3_2_1


 86%|████████▋ | 578/669 [14:29<02:10,  1.43s/it]

NM_018694.3 1_2_3


 87%|████████▋ | 579/669 [14:31<02:06,  1.40s/it]

NM_019852.4 8_7_6_5


 87%|████████▋ | 580/669 [14:32<02:04,  1.40s/it]

NM_020195.2 3_2_1


 87%|████████▋ | 581/669 [14:33<02:03,  1.40s/it]

NM_020195.2 4_3_2


 87%|████████▋ | 582/669 [14:35<02:01,  1.40s/it]

NM_020944.2 13_12_6


 87%|████████▋ | 583/669 [14:36<01:59,  1.39s/it]

NM_020944.2 16_14_13_12


 87%|████████▋ | 584/669 [14:38<01:58,  1.39s/it]

NM_021019.4 2_3_5


 87%|████████▋ | 585/669 [14:39<02:00,  1.44s/it]

NM_021170.3 3_2_1


 88%|████████▊ | 586/669 [14:41<02:00,  1.45s/it]

NM_021727.4 8_7_6


 88%|████████▊ | 587/669 [14:42<01:58,  1.45s/it]

NM_021933.3 5_6_7


 88%|████████▊ | 588/669 [14:44<01:57,  1.45s/it]

NM_022156.4 12_11_10


 88%|████████▊ | 589/669 [14:45<01:56,  1.46s/it]

NM_022551.2 1_2_3_4


 88%|████████▊ | 590/669 [14:47<02:00,  1.53s/it]

NM_022727.5 11_10_9_8


 88%|████████▊ | 591/669 [14:48<01:58,  1.52s/it]

NM_022734.2 9_11_12_13


 88%|████████▊ | 592/669 [14:50<01:57,  1.53s/it]

NM_022821.3 3_2_1


 89%|████████▊ | 593/669 [14:51<01:55,  1.52s/it]

NM_024299.2 1_2_3


 89%|████████▉ | 594/669 [14:53<01:51,  1.49s/it]

NM_024308.3 4_5_6


 89%|████████▉ | 595/669 [14:54<01:49,  1.47s/it]

NM_024319.3 5_4_3


 89%|████████▉ | 596/669 [14:56<01:48,  1.48s/it]

NM_024319.3 6_5_4


 89%|████████▉ | 597/669 [14:57<01:45,  1.46s/it]

NM_024339.3 10_11_12


 89%|████████▉ | 598/669 [14:58<01:41,  1.43s/it]

NM_024555.5 5_4_3_2


 90%|████████▉ | 599/669 [15:00<01:41,  1.45s/it]

NM_024555.5 6_5_4_3


 90%|████████▉ | 600/669 [15:01<01:39,  1.44s/it]

NM_024571.3 1_2_3


 90%|████████▉ | 601/669 [15:03<01:38,  1.45s/it]

NM_024860.3 2_3_4


 90%|████████▉ | 602/669 [15:04<01:34,  1.42s/it]

NM_025128.4 12_13_15


 90%|█████████ | 603/669 [15:06<01:33,  1.42s/it]

NM_025193.3 3_4_5


 90%|█████████ | 604/669 [15:07<01:33,  1.44s/it]

NM_025232.3 6_5_4


 90%|█████████ | 605/669 [15:09<01:50,  1.73s/it]

NM_030652.3 2_5_7


 91%|█████████ | 606/669 [15:12<02:05,  2.00s/it]

NM_030900.3 4_3_2


 91%|█████████ | 607/669 [15:14<01:54,  1.85s/it]

NM_030900.3 6_5_4_3


 91%|█████████ | 608/669 [15:15<01:43,  1.69s/it]

NM_031157.3 4_5_6


 91%|█████████ | 609/669 [15:16<01:39,  1.65s/it]

NM_031157.3 5_6_7


 91%|█████████ | 610/669 [15:18<01:33,  1.59s/it]

NM_031157.3 7_8_9_10


 91%|█████████▏| 611/669 [15:19<01:28,  1.52s/it]

NM_031299.5 5_4_3_2


 91%|█████████▏| 612/669 [15:21<01:25,  1.50s/it]

NM_031372.3 7_6_5


 92%|█████████▏| 613/669 [15:22<01:26,  1.55s/it]

NM_031471.5 9_11_12


 92%|█████████▏| 614/669 [15:24<01:23,  1.52s/it]

NM_032366.4 5_4_3_2


 92%|█████████▏| 615/669 [15:25<01:22,  1.52s/it]

NM_032478.3 5_4_1


 92%|█████████▏| 616/669 [15:27<01:19,  1.50s/it]

NM_032478.3 6_5_4


 92%|█████████▏| 617/669 [15:28<01:17,  1.50s/it]

NM_032478.3 8_7_6


 92%|█████████▏| 618/669 [15:30<01:16,  1.51s/it]

NM_032902.5 4_5_6


 93%|█████████▎| 619/669 [15:31<01:12,  1.45s/it]

NM_053046.3 2_3_4


 93%|█████████▎| 620/669 [15:32<01:09,  1.43s/it]

NM_079423.3 2_3_5


 93%|█████████▎| 621/669 [15:34<01:08,  1.43s/it]

NM_080598.5 7_6_1


 93%|█████████▎| 622/669 [15:35<01:08,  1.45s/it]

NM_080648.2 1_2_3


 93%|█████████▎| 623/669 [15:37<01:05,  1.43s/it]

NM_080649.2 1_2_3


 93%|█████████▎| 624/669 [15:38<01:04,  1.43s/it]

NM_130781.3 4_3_2_1


 93%|█████████▎| 625/669 [15:40<01:02,  1.42s/it]

NM_138353.2 10_11_12


 94%|█████████▎| 626/669 [15:41<01:00,  1.42s/it]

NM_138355.3 7_6_4


 94%|█████████▎| 627/669 [15:42<00:58,  1.39s/it]

NM_138689.2 3_2_1


 94%|█████████▍| 628/669 [15:44<00:57,  1.39s/it]

NM_138761.3 2_3_5


 94%|█████████▍| 629/669 [15:45<00:56,  1.41s/it]

NM_138769.2 15_16_17


 94%|█████████▍| 630/669 [15:47<00:57,  1.46s/it]

NM_139207.3 15_14_13


 94%|█████████▍| 631/669 [15:48<00:54,  1.43s/it]

NM_144589.2 6_5_4_3


 94%|█████████▍| 632/669 [15:50<00:53,  1.43s/it]

NM_144956.2 3_4_5


 95%|█████████▍| 633/669 [15:51<00:51,  1.44s/it]

NM_144957.2 3_4_5


 95%|█████████▍| 634/669 [15:52<00:50,  1.43s/it]

NM_144982.4 34_32_30


 95%|█████████▍| 635/669 [15:54<00:47,  1.40s/it]

NM_144998.3 3_2_1


 95%|█████████▌| 636/669 [15:55<00:46,  1.40s/it]

NM_145030.2 3_2_1


 95%|█████████▌| 637/669 [15:57<00:46,  1.44s/it]

NM_148175.2 17_18_19_20


 95%|█████████▌| 638/669 [15:58<00:43,  1.41s/it]

NM_148176.2 17_18_19_20


 96%|█████████▌| 639/669 [15:59<00:41,  1.39s/it]

NM_153824.2 7_6_5_4


 96%|█████████▌| 640/669 [16:01<00:40,  1.41s/it]

NM_170607.2 2_3_5


 96%|█████████▌| 641/669 [16:02<00:40,  1.45s/it]

NM_170607.2 3_5_6


 96%|█████████▌| 642/669 [16:04<00:38,  1.43s/it]

NM_170746.3 1_2_3


 96%|█████████▌| 643/669 [16:05<00:37,  1.46s/it]

NM_176096.2 1_3_5


 96%|█████████▋| 644/669 [16:07<00:36,  1.45s/it]

NM_177983.2 8_7_6


 96%|█████████▋| 645/669 [16:08<00:34,  1.45s/it]

NM_178443.2 9_11_12


 97%|█████████▋| 646/669 [16:10<00:33,  1.46s/it]

NM_181454.2 4_3_2


 97%|█████████▋| 647/669 [16:11<00:32,  1.46s/it]

NM_181455.2 4_3_2


 97%|█████████▋| 648/669 [16:13<00:30,  1.46s/it]

NM_181463.2 3_2_1


 97%|█████████▋| 649/669 [16:14<00:28,  1.44s/it]

NM_181575.4 10_9_8_7


 97%|█████████▋| 650/669 [16:16<00:29,  1.56s/it]

NM_181575.4 11_10_9_8


 97%|█████████▋| 651/669 [16:18<00:29,  1.66s/it]

NM_181575.4 4_3_2_1


 97%|█████████▋| 652/669 [16:19<00:28,  1.69s/it]

NM_181575.4 5_4_3_2


 98%|█████████▊| 653/669 [16:21<00:27,  1.70s/it]

NM_181575.4 7_6_5_4


 98%|█████████▊| 654/669 [16:23<00:26,  1.74s/it]

NM_181575.4 8_7_6_5


 98%|█████████▊| 655/669 [16:25<00:25,  1.80s/it]

NM_181575.4 9_8_7_6


 98%|█████████▊| 656/669 [16:27<00:24,  1.85s/it]

NM_182687.2 6_5_4


 98%|█████████▊| 657/669 [16:28<00:20,  1.73s/it]

NM_182687.2 8_7_6


 98%|█████████▊| 658/669 [16:30<00:17,  1.60s/it]

NM_183057.2 8_7_6


 99%|█████████▊| 659/669 [16:31<00:15,  1.54s/it]

NM_183241.1 1_2_3_4


 99%|█████████▊| 660/669 [16:33<00:14,  1.59s/it]

NM_183241.1 2_3_4_5


 99%|█████████▉| 661/669 [16:34<00:12,  1.60s/it]

NM_198317.2 2_3_4


 99%|█████████▉| 662/669 [16:36<00:10,  1.52s/it]

NM_198317.2 8_9_10


 99%|█████████▉| 663/669 [16:37<00:08,  1.47s/it]

NM_198976.2 11_12_13


 99%|█████████▉| 664/669 [16:39<00:07,  1.46s/it]

NM_198976.2 5_6_7


 99%|█████████▉| 665/669 [16:40<00:05,  1.46s/it]

NM_198976.2 6_7_8


100%|█████████▉| 666/669 [16:41<00:04,  1.44s/it]

NM_198976.2 7_8_9


100%|█████████▉| 667/669 [16:43<00:02,  1.44s/it]

NM_199337.2 1_2_3


100%|█████████▉| 668/669 [16:44<00:01,  1.44s/it]

NM_213720.2 3_2_1


100%|██████████| 669/669 [16:46<00:00,  1.50s/it]


In [11]:
mat_all.to_csv("/path/to/K562_splicing_status_per_read_intron_groups.txt", sep="\t", header=True, index=False)


## Analysis of EXOSC10 KD

In [2]:
# Function to get the number of reads mapping to each intermediate isoform

def get_pattern_counts(multi_introns_df, total_introns, introns_of_interest, n_introns_gene, strand):
    
    t=0

    # Make a dictionary with the patterns and the counts and another with the number of introns spliced and the counts
    pattern_dict = {}
    results_list = []
    spliced_counts_dict = {}
    
    introns_of_interest_list = introns_of_interest.split("_")
    
    # Initiate pattern_dict
    pattern_dict = {}
    #for n in range(len(introns_of_interest_list)+1):
    #    pattern_dict[n] = {}
        
    # Filter for patterns that are present with a certain threshold
    multi_introns_df = multi_introns_df[(multi_introns_df['count']>t) & (~multi_introns_df['splice_status'].str.contains("UND"))].reset_index(drop=True)
    
    # Iterate to get counts for isoforms
    for row in range(len(multi_introns_df)):
        gene_name = multi_introns_df.loc[row]['gene_name']
        splice_status_temp = multi_introns_df.loc[row]['splice_status']
        intron_numbers_list_tmp = multi_introns_df.loc[row]['intron_numbers'].split("_")
        if strand == "-":
            intron_numbers_list = [str(n_introns_gene - int(i)) for i in intron_numbers_list_tmp]
            intron_numbers = "_".join(intron_numbers_list)
        elif strand == "+":
            intron_numbers_list = [str(int(i)+1) for i in intron_numbers_list_tmp]
            intron_numbers = "_".join(intron_numbers_list)
            
            
        if introns_of_interest in intron_numbers:
            introns_of_interest_pos = [i for i, x in enumerate(intron_numbers_list) if x in introns_of_interest_list]
            splice_status_list_temp1 = splice_status_temp.split("_")
            splice_status_list_temp = [splice_status_list_temp1[a] for a in introns_of_interest_pos]
            splice_status_list = ["SKP" if "SKP" in a else a for a in splice_status_list_temp]
            splice_status = "_".join(splice_status_list)
            pattern_count = multi_introns_df.loc[row]['count']
            skipped_count = Counter(splice_status_list)['SKP']
            spliced_count = Counter(splice_status_list)['YES']
            unspliced_count = Counter(splice_status_list)['NO']
            if skipped_count == 0: # no skipped introns
                level = spliced_count
                if skipped_count < total_introns:
                    if splice_status not in pattern_dict.keys():
                        pattern_dict[splice_status] = pattern_count
                    elif splice_status in pattern_dict.keys():
                        pattern_dict[splice_status] += pattern_count
                        
    pattern_df = pd.DataFrame.from_dict(pattern_dict, orient='index').reset_index()
    
    if len(pattern_df) > 1:
        pattern_df.columns = ['pattern','count']
            
        return(pattern_df)
    
    
# Apply the function above to all genes of interest in a given sample
def get_read_count_path_per_gene(multi_introns_df, cand_gene_df):
    
    results_list = []
    
    for i in range(len(cand_gene_df)):
        gene = cand_gene_df.loc[i]['gene']
        gene_name = cand_gene_df.loc[i]['gene_name']
        analyzed_introns = cand_gene_df.loc[i]['analyzed_introns']
        n_analyzed_introns = len(analyzed_introns.split("_"))
        
        for sample_name in sample_list:
        
            df = multi_introns_df[(multi_introns_df['gene']==gene) & (multi_introns_df['sample_name']==sample_name)].reset_index(drop=True)
            total_introns = len(analyzed_introns.split("_"))
            n_introns_gene = int(hg38_intron_df.loc[hg38_intron_df['gene']==gene]['intron_total'].drop_duplicates())
            strand = "".join(hg38_intron_df.loc[hg38_intron_df['gene']==gene]['strand'].drop_duplicates().tolist())
        
            results_df = get_pattern_counts(df, total_introns, analyzed_introns, n_introns_gene, strand)
        
            if results_df is None:
                continue
            else:
                results_df['gene_name'] = gene_name
                results_df['gene'] = gene
                results_df['analyzed_introns'] = analyzed_introns
                results_df['n_analyzed_introns'] = n_analyzed_introns
                results_df['sample_name'] = sample_name
            
                results_list.append(results_df)
            
    final_df = pd.concat(results_list)
        
    return(final_df)
    
    

In [4]:
# Load intron features and gene_names df
hg38_intron_df = pd.read_table("/path/to/annotation_files/hg38_all_intron_features.txt")
gene_names_df = pd.read_table("/path/to/annotation_files/hg38_UCSC_refGene_names.txt")
gene_names_df.columns = ['gene_name','gene_id']

# Merge gene and intron annotations
hg38_intron_df['gene_id'] = hg38_intron_df['gene'].str.split("\\.").str[0]
gene_names_df = gene_names_df.merge(hg38_intron_df, on='gene_id')[['gene_name','gene','intron_total']].drop_duplicates().reset_index(drop=True)
hg38_intron_coord = hg38_intron_df.copy()[['chrom','start','end','gene','intron_pos']]




In [7]:
# Load results from cluster from identifying splicing order paths in both WT K562 replicates
path_df_tmp = pd.read_table('/path/to/K562_chromatin_splicing_paths_non_consecutive_introns.2mergedReps.RefSeq.max4introns.txt')
path_df = path_df_tmp[['sample_name','gene','gene_name','analyzed_introns','n_analyzed_introns','full_path','full_path_score','rank']].drop_duplicates().sort_values(by=['gene_name','analyzed_introns','rank']).reset_index(drop=True)

# Remove duplicates intron groups because they belong to different transcripts
intron_groups = path_df[['gene','gene_name','analyzed_introns','n_analyzed_introns']].drop_duplicates().reset_index(drop=True)

intron_groups_3 = intron_groups[intron_groups['n_analyzed_introns']==3].reset_index(drop=True)
intron_groups_3['int1'] = intron_groups_3['analyzed_introns'].str.split("_").str[0].astype(int)
intron_groups_3['int2'] = intron_groups_3['analyzed_introns'].str.split("_").str[1].astype(int)
intron_groups_3['int3'] = intron_groups_3['analyzed_introns'].str.split("_").str[2].astype(int)

intron_groups_3 = intron_groups_3.merge(hg38_intron_coord, left_on=['gene','int1'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_1', 'start':'start_1', 'end':'end_1'})
intron_groups_3 = intron_groups_3.merge(hg38_intron_coord, left_on=['gene','int2'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_2', 'start':'start_2', 'end':'end_2'})
intron_groups_3 = intron_groups_3.merge(hg38_intron_coord, left_on=['gene','int3'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_3', 'start':'start_3', 'end':'end_3'})

intron_groups_3 = intron_groups_3.sort_values(by=['gene','int1','int2','int3']).drop_duplicates(subset=['chrom_1','start_1','end_1','chrom_2','start_2','end_2','chrom_3','start_3','end_3']).reset_index(drop=True)

intron_groups_4 = intron_groups[intron_groups['n_analyzed_introns']==4].reset_index(drop=True)
intron_groups_4['int1'] = intron_groups_4['analyzed_introns'].str.split("_").str[0].astype(int)
intron_groups_4['int2'] = intron_groups_4['analyzed_introns'].str.split("_").str[1].astype(int)
intron_groups_4['int3'] = intron_groups_4['analyzed_introns'].str.split("_").str[2].astype(int)
intron_groups_4['int4'] = intron_groups_4['analyzed_introns'].str.split("_").str[3].astype(int)

intron_groups_4 = intron_groups_4.merge(hg38_intron_coord, left_on=['gene','int1'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_1', 'start':'start_1', 'end':'end_1'})
intron_groups_4 = intron_groups_4.merge(hg38_intron_coord, left_on=['gene','int2'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_2', 'start':'start_2', 'end':'end_2'})
intron_groups_4 = intron_groups_4.merge(hg38_intron_coord, left_on=['gene','int3'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_3', 'start':'start_3', 'end':'end_3'})
intron_groups_4 = intron_groups_4.merge(hg38_intron_coord, left_on=['gene','int4'], right_on=['gene','intron_pos']).rename(columns={'chrom':'chrom_4', 'start':'start_4', 'end':'end_4'})

intron_groups_4 = intron_groups_4.sort_values(by=['gene','int1','int2','int3','int4']).drop_duplicates(subset=['chrom_1','start_1','end_1','chrom_2','start_2','end_2','chrom_3','start_3','end_3','chrom_4','start_4','end_4']).reset_index(drop=True)

fields = ['gene','gene_name','analyzed_introns','n_analyzed_introns']
intron_groups_nodup = pd.concat([intron_groups_3[fields],intron_groups_4[fields]]).sort_values(by=['gene_name','analyzed_introns']).reset_index(drop=True)

# Merge back with paths
path_df_tmp_nodup = path_df_tmp.merge(intron_groups_nodup, on=['gene','gene_name','analyzed_introns','n_analyzed_introns'])
path_df_nodup = path_df_tmp_nodup[['sample_name','gene','gene_name','analyzed_introns','n_analyzed_introns','full_path','full_path_score','rank']].drop_duplicates().sort_values(by=['gene_name','analyzed_introns','rank']).reset_index(drop=True)

# Reformat the table to have replicates side by side
path_df_rep1 = path_df_nodup[path_df_nodup['sample_name']=='chr_rep1'].reset_index(drop=True)
path_df_rep2 = path_df_nodup[path_df_nodup['sample_name']=='chr_rep2'].reset_index(drop=True)

path_df_piv = path_df_rep1.merge(path_df_rep2, on=['gene_name','gene','analyzed_introns','n_analyzed_introns','full_path'])

# Retrieve the paths that are reproducible (same rank in both samples)
path_df_reprod = path_df_piv[(path_df_piv['rank_x']==path_df_piv['rank_y'])].reset_index(drop=True)


In [8]:
# Get intermediate isoform counts from scramble and EXOSC10 KD
scr1_multi_intron_counts = pd.read_table("/path/to/K562_scrambled_KD_nucRNA_hg38_multi_introns_isoforms_counts.withSKP.RefSeq.txt")
exo1_multi_intron_counts = pd.read_table("/path/to/K562_EXOSC10_KD_nucRNA_hg38_multi_introns_isoforms_counts.withSKP.RefSeq.txt")
scr2_multi_intron_counts = pd.read_table("/path/to/K562_scramble_KD_nucRNA_rep2_hg38_multi_introns_isoforms_counts.withSKP.RefSeq.txt")
exo2_multi_intron_counts = pd.read_table("/path/to/K562_EXOSC10_KD_nucRNA_rep2_hg38_multi_introns_isoforms_counts.withSKP.RefSeq.txt")

# Add sample name
scr1_multi_intron_counts['sample_name'] = "scrambled_KD_rep1"
exo1_multi_intron_counts['sample_name'] = "EXOSC10_KD_rep1"
scr2_multi_intron_counts['sample_name'] = "scrambled_KD_rep2"
exo2_multi_intron_counts['sample_name'] = "EXOSC10_KD_rep2"

# Merge with gene names
scr1_multi_intron_counts = scr1_multi_intron_counts.merge(gene_names_df, on='gene')
exo1_multi_intron_counts = exo1_multi_intron_counts.merge(gene_names_df, on='gene')
scr2_multi_intron_counts = scr2_multi_intron_counts.merge(gene_names_df, on='gene')
exo2_multi_intron_counts = exo2_multi_intron_counts.merge(gene_names_df, on='gene')

# Concatenate all samples
multi_intron_counts = pd.concat([scr1_multi_intron_counts,exo1_multi_intron_counts,
                                scr2_multi_intron_counts,exo2_multi_intron_counts]).reset_index(drop=True)

sample_list = multi_intron_counts['sample_name'].drop_duplicates().tolist()

In [9]:
# Get a list of intron groups to analyze
cand_gene_df = path_df_reprod[['gene','gene_name','analyzed_introns']].drop_duplicates().reset_index(drop=True)

# Get read counts
test_df = get_read_count_path_per_gene(multi_intron_counts, cand_gene_df)

# Normalize counts per gene region
counts_per_region = pd.DataFrame(test_df.groupby(['sample_name','gene_name','gene','analyzed_introns','n_analyzed_introns'])['count'].sum()).reset_index().rename(columns={'count':'total_count'})

test_df2 = test_df.merge(counts_per_region, on=['sample_name','gene_name','gene','analyzed_introns','n_analyzed_introns'])
test_df2['norm_count'] = test_df2['count'] / test_df2['total_count']

# Add the splicing level
test_df2['level'] = test_df2['pattern'].str.count(r'YES')

# Calculate log10 of counts
test_df2['log10_count'] = np.log10(test_df2['norm_count'])

# Write to file for plotting in R
test_df2.to_csv('/path/to/EXOSC10_KD_read_counts_for_intron_groups_in_paths_with_Reps.txt', sep="\t", header=True, index=False)


In [10]:
# Pivot table
test_df_piv = test_df2.pivot_table(index=['gene_name','gene','analyzed_introns','pattern','level'], columns='sample_name', values='count').fillna(0).reset_index()

In [11]:
# Function to compare the levels of intermediate isoforms per splicing level

def compare_isoforms_by_level(gene, analyzed_introns, pattern, scr, exo):
    
    # Retrieve all the other patterns for that intron group at that level
    pattern_count_scr = int(test_df_piv_sub[(test_df_piv_sub['gene']==gene) & (test_df_piv_sub['analyzed_introns']==analyzed_introns) &
                                   (test_df_piv_sub['pattern']==pattern)][scr])
    pattern_count_exo = int(test_df_piv_sub[(test_df_piv_sub['gene']==gene) & (test_df_piv_sub['analyzed_introns']==analyzed_introns) &
                                   (test_df_piv_sub['pattern']==pattern)][exo])
    
    total_count_scr = int(test_df_piv_sub[(test_df_piv_sub['gene']==gene) & (test_df_piv_sub['analyzed_introns']==analyzed_introns) &
                                   (test_df_piv_sub['pattern']!=pattern)][scr].sum())
    total_count_exo = int(test_df_piv_sub[(test_df_piv_sub['gene']==gene) & (test_df_piv_sub['analyzed_introns']==analyzed_introns) &
                                   (test_df_piv_sub['pattern']!=pattern)][exo].sum())
    
    OR, pvalue = scipy.stats.fisher_exact([[pattern_count_exo,total_count_exo],[pattern_count_scr,total_count_scr]])
    
    return(OR, pvalue)
    

In [12]:
# Filter for a minimum coverage in at least one condition, and only analyze intermediate isoforms
test_df_piv_sub = test_df_piv[(((test_df_piv['scrambled_KD_rep1']>10) & (test_df_piv['scrambled_KD_rep2']>10)) |
                         ((test_df_piv['EXOSC10_KD_rep1']>10) & (test_df_piv['EXOSC10_KD_rep2']>10))) &
                             ((test_df_piv['level']>0))].reset_index(drop=True)

# Compare conditions using function defined above
test_df_piv_sub['OR_rep1'] = test_df_piv_sub.apply(lambda row: compare_isoforms_by_level(row.gene, row.analyzed_introns, row.pattern, "scrambled_KD_rep1", "EXOSC10_KD_rep1")[0],axis=1)
test_df_piv_sub['OR_rep2'] = test_df_piv_sub.apply(lambda row: compare_isoforms_by_level(row.gene, row.analyzed_introns, row.pattern, "scrambled_KD_rep2", "EXOSC10_KD_rep2")[0],axis=1)
test_df_piv_sub['pvalue_rep1'] = test_df_piv_sub.apply(lambda row: compare_isoforms_by_level(row.gene, row.analyzed_introns, row.pattern, "scrambled_KD_rep1", "EXOSC10_KD_rep1")[1],axis=1)
test_df_piv_sub['pvalue_rep2'] = test_df_piv_sub.apply(lambda row: compare_isoforms_by_level(row.gene, row.analyzed_introns, row.pattern, "scrambled_KD_rep2", "EXOSC10_KD_rep2")[1],axis=1)

# Correct for multiple testing
test_df_piv_sub['FDR_rep1'] = multipletests(test_df_piv_sub['pvalue_rep1'], alpha=0.05, method='fdr_bh')[1]
test_df_piv_sub['FDR_rep2'] = multipletests(test_df_piv_sub['pvalue_rep2'], alpha=0.05, method='fdr_bh')[1]

# Add a category for statistical significance
test_df_piv_sub['sig_category'] = 'non-significant'
test_df_piv_sub.loc[(test_df_piv_sub['FDR_rep1']<0.1) & (test_df_piv_sub['FDR_rep2']<0.1) &
                    ((test_df_piv_sub['OR_rep1']>1) & (test_df_piv_sub['OR_rep2']>1)) &
                    (test_df_piv_sub['pattern'].str.contains("NO")), 'sig_category'] = 'significant'



# Retrieve intron groups that show significant differences
test_df_piv_sig = test_df_piv_sub[(test_df_piv_sub['sig_category']=='significant') & (test_df_piv_sub['pattern'].str.contains("YES")) & (test_df_piv_sub['pattern'].str.contains("NO")) & (test_df_piv_sub['OR_rep1']>1)].reset_index(drop=True)

In [13]:
# Write to file for for Supplemental Table
test_df_piv_sub.to_csv('/path/to/TableS4_EXOSC10_KD_pattern_comparison_fishers_exact_test_with_reps.txt', sep="\t", header=True, index=False)
