##### install kaos package

In [1]:
# pip install kaos-0.15-py3-none-any.whl --force-reinstall

##### import library

In [2]:
import kaos

##### Reads a FASTA file at the given `file_path` and concatenates the sequences into a single string.


Parameters:

    file_path (str): The path to the FASTA file.

Returns:

    str: Concatenated DNA sequence from the FASTA file.

In [3]:
help(kaos.read_fasta)

Help on function read_fasta in module kaos:

read_fasta(file_path: str)
    Reads a FASTA file at the given `file_path` and concatenates the sequences into a single string.
    
    Parameters:
        file_path (str): The path to the FASTA file.
    
    Returns:
        str: Concatenated DNA sequence from the FASTA file.



In [4]:
file_path = "GCF_000005845.2_ASM584v2_genomic.fna"
fasta_seq = kaos.read_fasta(file_path)
print(fasta_seq)

AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCAGGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATT

##### Define dummy fasta sequence

In [5]:
fasta_seq_dummy  = "ATTGCNATRATTT" 

##### chaos game representation key

Parameters:

    kmer_length (int): The length of the k-mer.

Returns:

    np.ndarray: A 2D numpy array representing the key matrix for CGR.

In [6]:
help(kaos.chaos_game_representation_key)

Help on function chaos_game_representation_key in module kaos:

chaos_game_representation_key(kmer_length: int)
    Generates the key matrix for Chaos Game Representation (CGR) for the given k-mer length.
    
    Parameters:
        kmer_length (int): The length of the k-mer.
    
    Returns:
        np.ndarray: A 2D numpy array representing the key matrix for CGR.



In [7]:
chaos_game_kmer_array = kaos.chaos_game_representation_key(kmer_length=3)
chaos_game_kmer_array

array([['CCC', 'CCG', 'CGC', 'CGG', 'GCC', 'GCG', 'GGC', 'GGG'],
       ['CCA', 'CCT', 'CGA', 'CGT', 'GCA', 'GCT', 'GGA', 'GGT'],
       ['CAC', 'CAG', 'CTC', 'CTG', 'GAC', 'GAG', 'GTC', 'GTG'],
       ['CAA', 'CAT', 'CTA', 'CTT', 'GAA', 'GAT', 'GTA', 'GTT'],
       ['ACC', 'ACG', 'AGC', 'AGG', 'TCC', 'TCG', 'TGC', 'TGG'],
       ['ACA', 'ACT', 'AGA', 'AGT', 'TCA', 'TCT', 'TGA', 'TGT'],
       ['AAC', 'AAG', 'ATC', 'ATG', 'TAC', 'TAG', 'TTC', 'TTG'],
       ['AAA', 'AAT', 'ATA', 'ATT', 'TAA', 'TAT', 'TTA', 'TTT']],
      dtype='<U3')

##### Returns the index of a specific k-mer in the Chaos Game Representation (CGR) key matrix.

Parameters:
        
        kmer (str): The k-mer for which the index is to be found.
        kmer_length (int): The length of the k-mer.

Returns:

        tuple: The row and column indices of the k-mer in the CGR key matrix.

In [8]:
help(kaos.return_kmer_index)

Help on function return_kmer_index in module kaos:

return_kmer_index(kmer: str, kmer_length: int)
    Returns the index of a specific k-mer in the Chaos Game Representation (CGR) key matrix.
    
    Parameters:
        kmer (str): The k-mer for which the index is to be found.
        kmer_length (int): The length of the k-mer.
    
    Returns:
        tuple: The row and column indices of the k-mer in the CGR key matrix.



In [9]:
kaos.return_kmer_index(kmer = "AAA", kmer_length=3)

(7, 0)

##### Returns the k-mer at the specified index in the Chaos Game Representation (CGR) key matrix.

Parameters:

    kmer_length (int): The length of the k-mer used to generate the Chaos Game Representation matrix.
    tuple_index (tuple): The index (row, column) of the k-mer in the matrix.

Returns:

    str: The k-mer at the specified index.

In [10]:
help(kaos.return_kmer_at_index)

Help on function return_kmer_at_index in module kaos:

return_kmer_at_index(kmer_length: int, tuple_index: tuple)
    Returns the k-mer at the specified index in the Chaos Game Representation (CGR) key matrix.
    
    Parameters:
        kmer_length (int): The length of the k-mer used to generate the Chaos Game Representation matrix.
        tuple_index (tuple): The index (row, column) of the k-mer in the matrix.
    
    Returns:
        str: The k-mer at the specified index.



In [11]:
kaos.return_kmer_at_index(kmer_length=3, tuple_index=(7, 0))

The kmer at index (7, 0) with kmer size 3 is AAA.


'AAA'

##### Generates the chaos frequency matrix for the given DNA sequence.

This function calculates the Chaos Frequency Matrix (CFM) for a given DNA sequence and k-mer length,
using the Chaos Game Representation (CGR) key matrix.

Parameters:

    fasta_string (str): The DNA sequence in FASTA format.
    kmer_length (int): The length of the k-mers to consider.
    chaos_game_kmer_array (np.array, optional): The Chaos Game Representation (CGR) key matrix. Defaults to None.
    pseudo_count (bool, optional): Whether to apply pseudo-counts of 1 to the matrix. Defaults to True.

Returns:

    tuple: A tuple containing:
        - np.array: The chaos frequency matrix representing k-mer frequencies.
        - np.array: The Chaos Game Representation (CGR) key matrix used.

In [12]:
help(kaos.chaos_frequency_matrix)

Help on function chaos_frequency_matrix in module kaos:

chaos_frequency_matrix(fasta_string: str, kmer_length: int, chaos_game_kmer_array: <built-in function array> = None, pseudo_count=True)
    Generates the chaos frequency matrix for the given DNA sequence.
    
    This function calculates the Chaos Frequency Matrix (CFM) for a given DNA sequence and k-mer length,
    using the Chaos Game Representation (CGR) key matrix.
    
    Parameters:
        fasta_string (str): The DNA sequence in FASTA format.
        kmer_length (int): The length of the k-mers to consider.
        chaos_game_kmer_array (np.array, optional): The Chaos Game Representation (CGR) key matrix. Defaults to None.
        pseudo_count (bool, optional): Whether to apply pseudo-counts to the matrix. Defaults to True.
    
    Returns:
        tuple: A tuple containing:
            - np.array: The chaos frequency matrix representing k-mer frequencies.
            - np.array: The Chaos Game Representation (CGR) key m

In [13]:
kaos_freq_matrix, kaos_key_array = kaos.chaos_frequency_matrix(fasta_string= fasta_seq_dummy, kmer_length=3, chaos_game_kmer_array=None,  pseudo_count = False)
kaos_freq_matrix

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 2., 0., 0., 0., 1.]])

##### Calculate the frequency dictionary of k-mers

Parameters:

    - fasta_string (str): The input DNA sequence in FASTA format.
    - kmer_length (int): The length of k-mers.
    - chaos_game_kmer_array (np.array): Chaos game k-mer array if pre-calculated, otherwise None.
    - pseudo_count (bool): Whether to apply pseudo-count (Laplace smoothing) or not. Default is True.

Returns:

    - frequency_dictionary (dict): A dictionary containing k-mers as keys and their frequencies as values.

In [14]:
help(kaos.chaos_frequency_dictionary)

Help on function chaos_frequency_dictionary in module kaos:

chaos_frequency_dictionary(fasta_string: str, kmer_length: int, chaos_game_kmer_array: <built-in function array> = None, pseudo_count=True)
    Calculate the frequency dictionary of k-mers in a chaos game representation matrix.
    
    Parameters:
        - fasta_string (str): The input DNA sequence in FASTA format.
        - kmer_length (int): The length of k-mers.
        - chaos_game_kmer_array (np.array): Chaos game k-mer array if pre-calculated, otherwise None.
        - pseudo_count (bool): Whether to apply pseudo-count (Laplace smoothing) or not. Default is True.
    
    Returns:
        - frequency_dictionary (dict): A dictionary containing k-mers as keys and their frequencies as values.



In [15]:
kaos.chaos_frequency_dictionary(fasta_string= fasta_seq_dummy, kmer_length=3, chaos_game_kmer_array=chaos_game_kmer_array, pseudo_count = False)

{'CCC': 0.0,
 'CCG': 0.0,
 'CGC': 0.0,
 'CGG': 0.0,
 'GCC': 0.0,
 'GCG': 0.0,
 'GGC': 0.0,
 'GGG': 0.0,
 'CCA': 0.0,
 'CCT': 0.0,
 'CGA': 0.0,
 'CGT': 0.0,
 'GCA': 0.0,
 'GCT': 0.0,
 'GGA': 0.0,
 'GGT': 0.0,
 'CAC': 0.0,
 'CAG': 0.0,
 'CTC': 0.0,
 'CTG': 0.0,
 'GAC': 0.0,
 'GAG': 0.0,
 'GTC': 0.0,
 'GTG': 0.0,
 'CAA': 0.0,
 'CAT': 0.0,
 'CTA': 0.0,
 'CTT': 0.0,
 'GAA': 0.0,
 'GAT': 0.0,
 'GTA': 0.0,
 'GTT': 0.0,
 'ACC': 0.0,
 'ACG': 0.0,
 'AGC': 0.0,
 'AGG': 0.0,
 'TCC': 0.0,
 'TCG': 0.0,
 'TGC': 1.0,
 'TGG': 0.0,
 'ACA': 0.0,
 'ACT': 0.0,
 'AGA': 0.0,
 'AGT': 0.0,
 'TCA': 0.0,
 'TCT': 0.0,
 'TGA': 0.0,
 'TGT': 0.0,
 'AAC': 0.0,
 'AAG': 0.0,
 'ATC': 0.0,
 'ATG': 0.0,
 'TAC': 0.0,
 'TAG': 0.0,
 'TTC': 0.0,
 'TTG': 1.0,
 'AAA': 0.0,
 'AAT': 0.0,
 'ATA': 0.0,
 'ATT': 2.0,
 'TAA': 0.0,
 'TAT': 0.0,
 'TTA': 0.0,
 'TTT': 1.0}

##### Calculate the count of a specific k-mer in a given DNA sequence.

Parameters:

    - key_name (str): The k-mer sequence for which the count is to be calculated.
    - fasta_content (str): The input DNA sequence in which the k-mer count is to be calculated.

Returns:

    - count (int): The count of the specified k-mer in the DNA sequence.


In [16]:
help(kaos.return_kmer_count_individual)

Help on function return_kmer_count_individual in module kaos:

return_kmer_count_individual(key_name: str, fasta_content: str)
    Calculate the count of a specific k-mer in a given DNA sequence.
    
    Parameters:
        - key_name (str): The k-mer sequence for which the count is to be calculated.
        - fasta_content (str): The input DNA sequence in which the k-mer count is to be calculated.
    
    Returns:
        - count (int): The count of the specified k-mer in the DNA sequence.



In [17]:
kaos.return_kmer_count_individual(key_name = "ATT", fasta_content =fasta_seq_dummy)

2