Author: Dan Shea  
Date: 2019.08.28  
#### Examining k-mers from the chr04 recombination hotspot region
We've already used `MEME` to examine potential motifs by performing a discriminant analysis using the sequences derived from the consensus calling of the SNP data for all 20 founders, and the IRGSP-v1.0 reference sequence. Partitioning the sequences into two sets, one containing the founders where recombination occurred, and the other containing the reference and the founders where no recombination events were recorded.

Next, I want to examine k-mer frequencies for k-mers 10 bp - 20 bp in length. The idea is simple, construct lists of all the kmers of length $l$ in each sequence. Aggregate the totals for each k-mer and compute the mean number of observations of a k-mer in the Recombinant Set (RS) and the Non-Recombinant Set (NRS). Perform a t-test on the means for k-mer to see if there is a statistically significant difference between those means. Then, perform False Discovery Rate (FDR) adjustments on the p-values to obtain q-values and test the q-values for significance (_i.e._ - $q < \alpha$ where $\alpha = 0.05$).

In [1]:
from Bio import SeqIO
from collections import OrderedDict
from statsmodels.stats.multitest import fdrcorrection
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.stats as stats

In [2]:
# Read in the fasta sequences for both the RS and NRS
rs_file  = 'chr04_hotspot_locus_RS.fasta'
nrs_file = 'chr04_hotspot_locus_NRS.fasta'
RS  = OrderedDict()
NRS = OrderedDict()
for ifile in (rs_file, nrs_file):
    seqio = SeqIO.parse(ifile, format='fasta')
    for seq in seqio:
        if ifile is rs_file:
            RS[seq.id.split(':')[0]] = seq
        else:
            NRS[seq.id.split(':')[0]] = seq        

In [3]:
# Generate a list of kmers of length l for each sequence in an OrderedDict()
def get_kmers(d, l=10):
    results = OrderedDict()
    for key in d:
        results[key] = OrderedDict()
        seq = d[key]
        slen = len(seq)
        start = 0
        stop = l
        terminate = (slen - l) + 1
        while stop <= terminate:
            kmer = str(seq[start:stop].seq).upper()
            if kmer in results[key].keys():
                results[key][kmer] += 1
            else:
                results[key][kmer] = 1
            start = stop
            stop += l
    return results

In [4]:
mink, maxk = (5, 31)
RS_kmers = OrderedDict()
NRS_kmers = OrderedDict()
for m in range(mink, maxk):
    RS_kmers[m] = get_kmers(RS, m)
    NRS_kmers[m] = get_kmers(NRS, m)

In [5]:
kmer_obs = OrderedDict()
for m in range(mink, maxk):
    kmer_obs[m] = OrderedDict({'RS': OrderedDict(), 'NRS': OrderedDict()})
    RS_founders = {key: val for val, key in enumerate(RS_kmers[m])}
    NRS_founders= {key: val for val, key in enumerate(NRS_kmers[m])}
    
    for founder in RS_kmers[m]:
        for k in RS_kmers[m][founder]:
            if k not in kmer_obs[m]['RS'].keys():
                kmer_obs[m]['RS'][k] = np.zeros(len(RS_founders))
            if k not in kmer_obs[m]['NRS'].keys():
                kmer_obs[m]['NRS'][k] = np.zeros(len(NRS_founders))
            kmer_obs[m]['RS'][k][RS_founders[founder]] = RS_kmers[m][founder][k]    
    
    for founder in NRS_kmers[m]:
        for k in NRS_kmers[m][founder]:
            if k not in kmer_obs[m]['RS'].keys():
                kmer_obs[m]['RS'][k] = np.zeros(len(RS_founders))
            if k not in kmer_obs[m]['NRS'].keys():
                kmer_obs[m]['NRS'][k] = np.zeros(len(NRS_founders))
            kmer_obs[m]['NRS'][k][NRS_founders[founder]] = NRS_kmers[m][founder][k]

In [6]:
kmer_dfs = OrderedDict()
for m in kmer_obs:
    kmer_dfs[m] = pd.DataFrame(kmer_obs[m])

In [7]:
kmer_dfs[5][0:10]

Unnamed: 0,RS,NRS
AAAAA,"[34.0, 35.0, 34.0, 34.0, 33.0, 35.0, 34.0]","[33.0, 34.0, 34.0, 34.0, 33.0, 33.0, 33.0, 33...."
AAAAC,"[15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0]","[15.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15...."
AAAAG,"[19.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0]","[20.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20...."
AAAAT,"[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0]","[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19...."
AAACA,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]","[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14...."
AAACC,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]","[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14...."
AAACG,"[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0]","[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, ..."
AAACT,"[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]","[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, ..."
AAAGA,"[11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0]","[11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11...."
AAAGC,"[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]","[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, ..."


In [8]:
for i in kmer_dfs:
    kmer_dfs[i]['pvalue'] = kmer_dfs[i].apply(lambda x: stats.ttest_ind(x['RS'], x['NRS']).pvalue, axis=1)

In [9]:
# Note: we have NaNs because the variance is 0 in these comparisons and dividing by 0 yields NaN
kmer_dfs[5][0:10]

Unnamed: 0,RS,NRS,pvalue
AAAAA,"[34.0, 35.0, 34.0, 34.0, 33.0, 35.0, 34.0]","[33.0, 34.0, 34.0, 34.0, 33.0, 33.0, 33.0, 33....",0.017981
AAAAC,"[15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0]","[15.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15....",0.317134
AAAAG,"[19.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0]","[20.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20....",0.003169
AAAAT,"[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0]","[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....",0.493643
AAACA,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]","[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14....",
AAACC,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]","[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14....",
AAACG,"[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0]","[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, ...",
AAACT,"[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]","[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, ...",
AAAGA,"[11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0]","[11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11....",
AAAGC,"[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]","[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, ...",


In [10]:
# Replace instances of NaN with 1.0 since this means no difference in the means
# i.e. - fail to reject H0
for i in kmer_dfs:
    kmer_dfs[i]['pvalue'].fillna(1.0, inplace=True)

In [11]:
# Looks much better!
kmer_dfs[5][0:10]

Unnamed: 0,RS,NRS,pvalue
AAAAA,"[34.0, 35.0, 34.0, 34.0, 33.0, 35.0, 34.0]","[33.0, 34.0, 34.0, 34.0, 33.0, 33.0, 33.0, 33....",0.017981
AAAAC,"[15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0]","[15.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15....",0.317134
AAAAG,"[19.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0]","[20.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20....",0.003169
AAAAT,"[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0]","[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....",0.493643
AAACA,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]","[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14....",1.0
AAACC,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]","[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14....",1.0
AAACG,"[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0]","[8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, ...",1.0
AAACT,"[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]","[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, ...",1.0
AAAGA,"[11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0]","[11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11....",1.0
AAAGC,"[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]","[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, ...",1.0


In [12]:
# Needed to pretty print the Pandas DataFrames
from IPython.display import display
# Apply FDR correction to the pvalues
for i in kmer_dfs:
    kmer_dfs[i]['qvalue'] = fdrcorrection(kmer_dfs[i].pvalue)[1]
    print('k-mer length: {}'.format(i))
    display(kmer_dfs[i].loc[kmer_dfs[i].qvalue < 0.05, :])

k-mer length: 5


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 6


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 7


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 8


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 9


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 10


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 11


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 12


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 13


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 14


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 15


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 16


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 17


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 18


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 19


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 20


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 21


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 22


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 23


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 24


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 25


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 26


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 27


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 28


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 29


Unnamed: 0,RS,NRS,pvalue,qvalue


k-mer length: 30


Unnamed: 0,RS,NRS,pvalue,qvalue


In [26]:
# Calculate the total number of observations for a k-mer in each partition (RS vs. NRS)
for i in kmer_dfs:
    kmer_dfs[i]['RS_total'] = kmer_dfs[i].apply(lambda x: sum(x['RS']), axis=1)
    kmer_dfs[i]['NRS_total'] = kmer_dfs[i].apply(lambda x: sum(x['NRS']), axis=1)

In [29]:
# Examine k-mers that do not appear in the NRS partition
for i in kmer_dfs:
    print('k-mer length: {}'.format(i))
    display(kmer_dfs[i].loc[kmer_dfs[i].NRS_total == 0.0, :])

k-mer length: 5


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total


k-mer length: 6


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 7


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAGC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 8


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
ATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTGGTCA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 9


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TCTTCTTAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 10


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGGCATTG,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TCTTATTACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 11


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GCATTGGTCAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TATTACATAAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 12


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATTGGTCAACTC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 13


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGGCATTGGTCAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 14


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTACATAAACACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 15


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGGCATTGGTCAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TCTTATTACATAAAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 16


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGTCA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGCTGGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TTCTTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 17


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAGAGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CGATGGGCTGGAAAGCC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
CTTATTACATAAACACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 18


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAAGAGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATCATGCATTCTTCTTAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GTGCGATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 19


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGTCAACT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CTTCTTATTACATAAACAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GCGATGGGCTGGAAAGCCA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 20


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAGCCAACGAGCGCCAA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGGGAAAGAGAGAGGCATTG,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TCTTATTACATAAACACAAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 21


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAGCCAACGAGCGCCAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGGAAAGAGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TCTTCTTATTACATAAACACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 22


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GCATTGGTCAACTCCGTTCATC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GCGATGGGCTGGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TGCATTCTTCTTATTACATAAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 23


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATCATGCATTCTTCTTATTACAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATGGGCTGGAAAGCCAACGAGCG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
CCAAGGGGAAAGAGAGAGGCATT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 24


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATCATGCATTCTTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTGGTCAACTCCGTTCATCGACC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GTGCGATGGGCTGGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 25


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAGAGAGAGGCATTGGTCAACTCCG,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTCTTCTTATTACATAAACACAAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGCGCCAAGGGGA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 26


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAATCATGCATTCTTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
AAGGAGAGGTGCGATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
AGGCATTGGTCAACTCCGTTCATCGA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 27


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATCATGCATTCTTCTTATTACATAAAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CAAGGAGAGGTGCGATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGCATTGGTCAACTCCGTTCATCGACC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 28


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGTCAACTCCGTTCATC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTACATAAACACAATTCTCTGCAAATC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGCGCCAAGGGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,0.977431,3.0,0.0


k-mer length: 29


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
CAAGGGGAAAGAGAGAGGCATTGGTCAAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CCTCTGCAAATCATGCATTCTTCTTATTA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GTGCGATGGGCTGGAAAGCCAACGAGCGC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,0.945044,3.0,0.0


k-mer length: 30


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGGCATTGGTCAACTCCGTTCATCGACC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGCGCCAAGGGGAAAGAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,0.9193,3.0,0.0
TCTTATTACATAAACACAATTCTCTGCAAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


In [33]:
# Examine k-mers that do appear in greater number in RS when compared to the NRS partition
for i in kmer_dfs:
    print('k-mer length: {}'.format(i))
    display(kmer_dfs[i].loc[kmer_dfs[i].RS_total > 2.5 * kmer_dfs[i].NRS_total, :])

k-mer length: 5


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total


k-mer length: 6


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 7


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAGC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 8


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
ATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTGGTCA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 9


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TCTTCTTAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 10


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGGCATTG,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TCTTATTACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 11


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GCATTGGTCAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TATTACATAAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 12


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATTGGTCAACTC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 13


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGGCATTGGTCAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 14


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTACATAAACACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 15


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGGCATTGGTCAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TCTTATTACATAAAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 16


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGTCA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGCTGGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TTCTTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 17


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAGAGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CGATGGGCTGGAAAGCC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
CTTATTACATAAACACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 18


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAAGAGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATCATGCATTCTTCTTAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GTGCGATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 19


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGTCAACT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CTTCTTATTACATAAACAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GCGATGGGCTGGAAAGCCA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 20


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAGCCAACGAGCGCCAA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGGGAAAGAGAGAGGCATTG,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TCTTATTACATAAACACAAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 21


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GGAAAGCCAACGAGCGCCAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGGAAAGAGAGAGGCATTGGT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
TCTTCTTATTACATAAACACA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 22


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
GCATTGGTCAACTCCGTTCATC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GCGATGGGCTGGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
TGCATTCTTCTTATTACATAAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 23


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATCATGCATTCTTCTTATTACAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATGGGCTGGAAAGCCAACGAGCG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
CCAAGGGGAAAGAGAGAGGCATT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 24


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATCATGCATTCTTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTGGTCAACTCCGTTCATCGACC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GTGCGATGGGCTGGAAAGCCAACG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 25


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAGAGAGAGGCATTGGTCAACTCCG,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTCTTCTTATTACATAAACACAAT,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGCGCCAAGGGGA,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0


k-mer length: 26


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AAATCATGCATTCTTCTTATTACATA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
AAGGAGAGGTGCGATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
AGGCATTGGTCAACTCCGTTCATCGA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 27


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
ATCATGCATTCTTCTTATTACATAAAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CAAGGAGAGGTGCGATGGGCTGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,1.0,3.0,0.0
GGCATTGGTCAACTCCGTTCATCGACC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0


k-mer length: 28


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGAGGCATTGGTCAACTCCGTTCATC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
ATTACATAAACACAATTCTCTGCAAATC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGCGCCAAGGGGAAAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,0.977431,3.0,0.0


k-mer length: 29


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
CAAGGGGAAAGAGAGAGGCATTGGTCAAC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
CCTCTGCAAATCATGCATTCTTCTTATTA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GTGCGATGGGCTGGAAAGCCAACGAGCGC,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,0.945044,3.0,0.0


k-mer length: 30


Unnamed: 0,RS,NRS,pvalue,qvalue,RS_total,NRS_total
AGAGGCATTGGTCAACTCCGTTCATCGACC,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
GGAAAGCCAACGAGCGCCAAGGGGAAAGAG,"[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.006133,0.9193,3.0,0.0
TCTTATTACATAAACACAATTCTCTGCAAA,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.16255,1.0,1.0,0.0
