In [1]:
import glob
import math
import os
import numpy as np
import pandas as pd
from scipy.stats import pearsonr

# Function to calculate mononucleotide perplexity
def calculate_mononucleotide_perplexity(seq):
    counts = {nucleotide: seq.count(nucleotide) for nucleotide in "ACGT"}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {nucleotide: count / total for nucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to calculate dinucleotide perplexity
def calculate_dinucleotide_perplexity(seq):
    dinucleotides = [seq[i:i+2] for i in range(len(seq) - 1)]
    counts = {dinucleotide: dinucleotides.count(dinucleotide) for dinucleotide in [a+b for a in "ACGT" for b in "ACGT"]}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {dinucleotide: count / total for dinucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to calculate trinucleotide perplexity
def calculate_trinucleotide_perplexity(seq):
    trinucleotides = [seq[i:i+3] for i in range(len(seq) - 2)]
    counts = {trinucleotide: trinucleotides.count(trinucleotide) for trinucleotide in [a+b+c for a in "ACGT" for b in "ACGT" for c in "ACGT"]}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {trinucleotide: count / total for trinucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to process each file and calculate perplexities
def process_files(pattern):
    files = glob.glob(pattern)
    results = []

    for file in files:
        print(f"Processing file: {os.path.basename(file)}")

        sequences = []
        affinities = []

        # Read the .txt file and extract sequences and affinity scores
        with open(file, 'r') as f:
            for line in f:
                data = line.strip().split()
                sequence = data[0]
                affinity = float(data[1])

                sequences.append(sequence)
                affinities.append(affinity)

        # Calculate perplexities for each sequence
        mono_perplexities = []
        di_perplexities = []
        tri_perplexities = []

        for sequence in sequences:
            mono_perplexity = calculate_mononucleotide_perplexity(sequence)
            di_perplexity = calculate_dinucleotide_perplexity(sequence)
            tri_perplexity = calculate_trinucleotide_perplexity(sequence)

            mono_perplexities.append(mono_perplexity)
            di_perplexities.append(di_perplexity)
            tri_perplexities.append(tri_perplexity)

        # Calculate Pearson correlation coefficients
        mono_corr, _ = pearsonr(mono_perplexities, affinities)
        di_corr, _ = pearsonr(di_perplexities, affinities)
        tri_corr, _ = pearsonr(tri_perplexities, affinities)

        # Print results to the screen
        print(f"File: {os.path.basename(file)}")
        print(f"  Mononucleotide Correlation: {mono_corr:.4f}")
        print(f"  Dinucleotide Correlation: {di_corr:.4f}")
        print(f"  Trinucleotide Correlation: {tri_corr:.4f}")
        print()

        # Append results to the list
        results.append({
            'File': os.path.basename(file),
            'Mononucleotide Correlation': mono_corr,
            'Dinucleotide Correlation': di_corr,
            'Trinucleotide Correlation': tri_corr
        })

    # Convert results to a DataFrame and save to Excel
    df = pd.DataFrame(results)
    output_file = "perplexity_correlations.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Results saved to {output_file}")

# Process all .txt files in the current folder
process_files("*.txt")


Processing file: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
File: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
  Mononucleotide Correlation: 0.0437
  Dinucleotide Correlation: 0.0540
  Trinucleotide Correlation: 0.0647

Processing file: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
File: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
  Mononucleotide Correlation: 0.2820
  Dinucleotide Correlation: 0.2148
  Trinucleotide Correlation: 0.1974

Processing file: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
File: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
  Mononucleotide Correlation: 0.0100
  Dinucleotide Correlation: -0.0820
  Trinucleotide Correlation: -0.0606

Processing file: bHLH_TCF4_TGCGAA20NGA_CACCTG_14_3.txt
File: bHLH_TCF4_TGCGAA20NGA_CACCTG_14_3.txt
  Mononucleotide Correlation: 0.1306
  Dinucleotide Correlation: 0.1016
  Trinucleotide Correlation: 0.0027

Processing file: bHLH_TFAP4_TCACGT20NGCA_CAGCTG_12_4.txt
File: bHLH_TFAP4_TCACGT20NGCA_CAGCTG_12_4.txt
  Mononucleotide Correlation: 0.2129
  Dinucleotide Corre

In [2]:
import glob
import math
import os
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# Function to calculate mononucleotide perplexity
def calculate_mononucleotide_perplexity(seq):
    counts = {nucleotide: seq.count(nucleotide) for nucleotide in "ACGT"}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {nucleotide: count / total for nucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to calculate dinucleotide perplexity
def calculate_dinucleotide_perplexity(seq):
    dinucleotides = [seq[i:i+2] for i in range(len(seq) - 1)]
    counts = {dinucleotide: dinucleotides.count(dinucleotide) for dinucleotide in [a+b for a in "ACGT" for b in "ACGT"]}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {dinucleotide: count / total for dinucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to calculate trinucleotide perplexity
def calculate_trinucleotide_perplexity(seq):
    trinucleotides = [seq[i:i+3] for i in range(len(seq) - 2)]
    counts = {trinucleotide: trinucleotides.count(trinucleotide) for trinucleotide in [a+b+c for a in "ACGT" for b in "ACGT" for c in "ACGT"]}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {trinucleotide: count / total for trinucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to process each file and calculate perplexities and correlations
def process_files(pattern):
    files = glob.glob(pattern)
    results = []

    for file in files:
        print(f"Processing file: {os.path.basename(file)}")

        sequences = []
        affinities = []

        # Read the .txt file and extract sequences and affinity scores
        with open(file, 'r') as f:
            for line in f:
                data = line.strip().split()
                sequence = data[0]
                affinity = float(data[1])

                sequences.append(sequence)
                affinities.append(affinity)

        # Calculate perplexities for each sequence
        mono_perplexities = []
        di_perplexities = []
        tri_perplexities = []

        for sequence in sequences:
            mono_perplexity = calculate_mononucleotide_perplexity(sequence)
            di_perplexity = calculate_dinucleotide_perplexity(sequence)
            tri_perplexity = calculate_trinucleotide_perplexity(sequence)

            mono_perplexities.append(mono_perplexity)
            di_perplexities.append(di_perplexity)
            tri_perplexities.append(tri_perplexity)

        # Calculate Pearson correlation coefficients and their p-values
        mono_pearson_corr, mono_pearson_p = pearsonr(mono_perplexities, affinities)
        di_pearson_corr, di_pearson_p = pearsonr(di_perplexities, affinities)
        tri_pearson_corr, tri_pearson_p = pearsonr(tri_perplexities, affinities)

        # Calculate Spearman correlation coefficients and their p-values
        mono_spearman_corr, mono_spearman_p = spearmanr(mono_perplexities, affinities)
        di_spearman_corr, di_spearman_p = spearmanr(di_perplexities, affinities)
        tri_spearman_corr, tri_spearman_p = spearmanr(tri_perplexities, affinities)

        # Print and check for significance
        def print_corr(name, pearson_corr, pearson_p, spearman_corr, spearman_p):
            pearson_result = f"{pearson_corr:.4f}" if pearson_p < 0.001 else "not significant"
            spearman_result = f"{spearman_corr:.4f}" if spearman_p < 0.001 else "not significant"
            print(f"  {name} Pearson Correlation: {pearson_result}, Spearman Correlation: {spearman_result}")

        print(f"File: {os.path.basename(file)}")
        print_corr("Mononucleotide", mono_pearson_corr, mono_pearson_p, mono_spearman_corr, mono_spearman_p)
        print_corr("Dinucleotide", di_pearson_corr, di_pearson_p, di_spearman_corr, di_spearman_p)
        print_corr("Trinucleotide", tri_pearson_corr, tri_pearson_p, tri_spearman_corr, tri_spearman_p)
        print()

        # Append results to the list for saving
        results.append({
            'File': os.path.basename(file),
            'Mono Pearson Correlation': mono_pearson_corr if mono_pearson_p < 0.001 else "not significant",
            'Mono Spearman Correlation': mono_spearman_corr if mono_spearman_p < 0.001 else "not significant",
            'Di Pearson Correlation': di_pearson_corr if di_pearson_p < 0.001 else "not significant",
            'Di Spearman Correlation': di_spearman_corr if di_spearman_p < 0.001 else "not significant",
            'Tri Pearson Correlation': tri_pearson_corr if tri_pearson_p < 0.001 else "not significant",
            'Tri Spearman Correlation': tri_spearman_corr if tri_spearman_p < 0.001 else "not significant"
        })

    # Convert results to a DataFrame and save to Excel
    df = pd.DataFrame(results)
    output_file = "perplexity_correlations_with_spearman.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Results saved to {output_file}")

# Process all .txt files in the current folder
process_files("*.txt")


Processing file: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
File: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
  Mononucleotide Pearson Correlation: 0.0437, Spearman Correlation: 0.0482
  Dinucleotide Pearson Correlation: 0.0540, Spearman Correlation: 0.0506
  Trinucleotide Pearson Correlation: 0.0647, Spearman Correlation: 0.0534

Processing file: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
File: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
  Mononucleotide Pearson Correlation: 0.2820, Spearman Correlation: 0.2555
  Dinucleotide Pearson Correlation: 0.2148, Spearman Correlation: 0.2476
  Trinucleotide Pearson Correlation: 0.1974, Spearman Correlation: 0.2117

Processing file: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
File: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
  Mononucleotide Pearson Correlation: not significant, Spearman Correlation: not significant
  Dinucleotide Pearson Correlation: -0.0820, Spearman Correlation: -0.1085
  Trinucleotide Pearson Correlation: -0.0606, Spearman Correlation: -0.0750

Proces

In [4]:
import glob
import math
import os
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from collections import Counter

# Function to calculate perplexity for any k-mer (mononucleotide, dinucleotide, etc.)
def calculate_kmer_perplexity(seq, k):
    kmers = [seq[i:i+k] for i in range(len(seq) - k + 1)]
    counts = Counter(kmers)
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {kmer: count / total for kmer, count in counts.items()}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to process each file and calculate perplexities and correlations
def process_files(pattern):
    files = glob.glob(pattern)
    results = []

    for file in files:
        print(f"Processing file: {os.path.basename(file)}")

        sequences, affinities = [], []

        # Read the .txt file and extract sequences and affinity scores
        with open(file, 'r') as f:
            for line in f:
                data = line.strip().split()
                sequence, affinity = data[0], float(data[1])
                sequences.append(sequence)
                affinities.append(affinity)

        # Calculate perplexities for each sequence for mononucleotide (k=1), dinucleotide (k=2), trinucleotide (k=3)
        perplexity_types = {'Mono': 1, 'Di': 2, 'Tri': 3}
        perplexity_data = {key: [calculate_kmer_perplexity(seq, k) for seq in sequences] 
                           for key, k in perplexity_types.items()}

        # Calculate Pearson and Spearman correlations for each perplexity type
        correlations = []
        for name, perplexities in perplexity_data.items():
            pearson_corr, pearson_p = pearsonr(perplexities, affinities)
            spearman_corr, spearman_p = spearmanr(perplexities, affinities)
            
            correlations.append({
                'Perplexity Type': name,
                'Pearson Correlation': pearson_corr if pearson_p < 0.001 else "not significant",
                'Spearman Correlation': spearman_corr if spearman_p < 0.001 else "not significant"
            })

        # Store results in a DataFrame-friendly format
        for corr in correlations:
            results.append({
                'File': os.path.basename(file),
                'Perplexity Type': corr['Perplexity Type'],
                'Pearson Correlation': corr['Pearson Correlation'],
                'Spearman Correlation': corr['Spearman Correlation']
            })

        # Print correlation results for each perplexity type
        for corr in correlations:
            print(f"{corr['Perplexity Type']} - Pearson: {corr['Pearson Correlation']}, Spearman: {corr['Spearman Correlation']}")

    # Save results to an Excel file
    df = pd.DataFrame(results)
    output_file = "perplexity_correlations_optimized.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Results saved to {output_file}")

# Process all .txt files in the current folder
process_files("*.txt")


Processing file: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
Mono - Pearson: 0.043721225617365696, Spearman: 0.048162681107630584
Di - Pearson: 0.054013352936682454, Spearman: 0.05063623465761682
Tri - Pearson: 0.06465039824143351, Spearman: 0.05339885677585925
Processing file: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
Mono - Pearson: 0.28195012954267945, Spearman: 0.2554617458255881
Di - Pearson: 0.2147686423259007, Spearman: 0.247643272142844
Tri - Pearson: 0.19736997746322418, Spearman: 0.2116766979328942
Processing file: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
Mono - Pearson: not significant, Spearman: not significant
Di - Pearson: -0.08197896667518795, Spearman: -0.10851905738816231
Tri - Pearson: -0.06062849169546964, Spearman: -0.07500660681249083
Processing file: bHLH_TCF4_TGCGAA20NGA_CACCTG_14_3.txt
Mono - Pearson: 0.1306339173667541, Spearman: 0.12696052055432733
Di - Pearson: 0.10159499975159672, Spearman: 0.11156895341520498
Tri - Pearson: not significant, Spearman: not significant


In [3]:
import glob
import math
import os
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr

# Function to calculate dinucleotide perplexity
def calculate_dinucleotide_perplexity(seq):
    dinucleotides = [seq[i:i+2] for i in range(len(seq) - 1)]
    counts = {dinucleotide: dinucleotides.count(dinucleotide) for dinucleotide in [a+b for a in "ACGT" for b in "ACGT"]}
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {dinucleotide: count / total for dinucleotide, count in counts.items() if count > 0}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to calculate GC percentage
def calculate_gc_content(seq):
    gc_count = sum(1 for nucleotide in seq if nucleotide in "GC")
    total_count = len(seq)
    if total_count == 0:
        return 0
    return (gc_count / total_count) * 100

# Function to process each file and calculate dinucleotide perplexities and correlations
def process_files(pattern):
    files = glob.glob(pattern)
    results = []

    for file in files:
        print(f"Processing file: {os.path.basename(file)}")

        sequences = []
        affinities = []

        # Read the .txt file and extract sequences and affinity scores
        with open(file, 'r') as f:
            for line in f:
                data = line.strip().split()
                sequence = data[0]
                affinity = float(data[1])

                sequences.append(sequence)
                affinities.append(affinity)

        # Calculate perplexities and GC content for each sequence
        di_perplexities = []
        gc_contents = []

        for sequence in sequences:
            di_perplexity = calculate_dinucleotide_perplexity(sequence)
            gc_content = calculate_gc_content(sequence)

            di_perplexities.append(di_perplexity)
            gc_contents.append(gc_content)

        # Calculate Pearson and Spearman correlation coefficients
        pearson_corr, _ = pearsonr(di_perplexities, affinities)
        spearman_corr, _ = spearmanr(di_perplexities, affinities)

        # Calculate Pearson and Spearman correlations for GC content
        gc_pearson_corr, _ = pearsonr(gc_contents, affinities)
        gc_spearman_corr, _ = spearmanr(gc_contents, affinities)

        # Print results to the screen
        print(f"File: {os.path.basename(file)}")
        print(f"  Dinucleotide Perplexity Pearson Correlation: {pearson_corr:.4f}, Spearman Correlation: {spearman_corr:.4f}")
        print(f"  GC Content Pearson Correlation: {gc_pearson_corr:.4f}, Spearman Correlation: {gc_spearman_corr:.4f}")
        print()

        # Append results to the list
        results.append({
            'File': os.path.basename(file),
            'Dinucleotide Perplexity Pearson Correlation': pearson_corr,
            'Dinucleotide Perplexity Spearman Correlation': spearman_corr,
            'GC Content Pearson Correlation': gc_pearson_corr,
            'GC Content Spearman Correlation': gc_spearman_corr
        })

    # Convert results to a DataFrame and save to Excel
    df = pd.DataFrame(results)
    output_file = "dinucleotide_perplexity_gc_correlations.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Results saved to {output_file}")

# Process all .txt files in the current folder
process_files("*.txt")


Processing file: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
File: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
  Dinucleotide Perplexity Pearson Correlation: 0.0540, Spearman Correlation: 0.0506
  GC Content Pearson Correlation: 0.1399, Spearman Correlation: 0.1582

Processing file: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
File: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
  Dinucleotide Perplexity Pearson Correlation: 0.2148, Spearman Correlation: 0.2476
  GC Content Pearson Correlation: -0.0329, Spearman Correlation: -0.0600

Processing file: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
File: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
  Dinucleotide Perplexity Pearson Correlation: -0.0820, Spearman Correlation: -0.1085
  GC Content Pearson Correlation: 0.0877, Spearman Correlation: 0.1487

Processing file: bHLH_TCF4_TGCGAA20NGA_CACCTG_14_3.txt
File: bHLH_TCF4_TGCGAA20NGA_CACCTG_14_3.txt
  Dinucleotide Perplexity Pearson Correlation: 0.1016, Spearman Correlation: 0.1116
  GC Content Pearson Correlation: -0.0018,

KeyboardInterrupt: 

In [5]:
import glob
import math
import os
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from collections import Counter

# Function to calculate perplexity for any k-mer (dinucleotide in this case, k=2)
def calculate_kmer_perplexity(seq, k):
    kmers = [seq[i:i+k] for i in range(len(seq) - k + 1)]
    counts = Counter(kmers)
    total = sum(counts.values())
    
    if total == 0:
        return float('inf')
    
    probabilities = {kmer: count / total for kmer, count in counts.items()}
    entropy = -sum(p * math.log2(p) for p in probabilities.values())
    perplexity = 2 ** entropy
    return perplexity

# Function to calculate GC content
def calculate_gc_content(seq):
    gc_count = sum(1 for nucleotide in seq if nucleotide in "GC")
    total_count = len(seq)
    if total_count == 0:
        return 0
    return (gc_count / total_count) * 100

# Function to process each file and calculate perplexities, GC content, and correlations
def process_files(pattern):
    files = glob.glob(pattern)
    results = []

    for file in files:
        print(f"Processing file: {os.path.basename(file)}")

        sequences, affinities = [], []

        # Read the .txt file and extract sequences and affinity scores
        with open(file, 'r') as f:
            for line in f:
                data = line.strip().split()
                sequence, affinity = data[0], float(data[1])
                sequences.append(sequence)
                affinities.append(affinity)

        # Ensure we have at least 2 sequences and affinities to compute correlation
        if len(sequences) < 2 or len(affinities) < 2:
            print(f"Skipping {file} due to insufficient data (less than 2 sequences).")
            continue

        # Calculate dinucleotide perplexity (k=2) and GC content for each sequence
        dinucleotide_perplexities = [calculate_kmer_perplexity(seq, 2) for seq in sequences]
        gc_contents = [calculate_gc_content(seq) for seq in sequences]

        # Calculate Pearson and Spearman correlations for dinucleotide perplexity
        try:
            di_pearson_corr, di_pearson_p = pearsonr(dinucleotide_perplexities, affinities)
            di_spearman_corr, di_spearman_p = spearmanr(dinucleotide_perplexities, affinities)
        except ValueError:
            print(f"Skipping correlation calculation for {file} due to invalid data.")
            continue

        # Calculate Pearson and Spearman correlations for GC content
        try:
            gc_pearson_corr, gc_pearson_p = pearsonr(gc_contents, affinities)
            gc_spearman_corr, gc_spearman_p = spearmanr(gc_contents, affinities)
        except ValueError:
            print(f"Skipping GC correlation calculation for {file} due to invalid data.")
            continue

        # Store results in a DataFrame-friendly format
        results.append({
            'File': os.path.basename(file),
            'Correlation Type': 'Dinucleotide Perplexity',
            'Pearson Correlation': di_pearson_corr if di_pearson_p < 0.001 else "not significant",
            'Spearman Correlation': di_spearman_corr if di_spearman_p < 0.001 else "not significant"
        })
        
        results.append({
            'File': os.path.basename(file),
            'Correlation Type': 'GC Content',
            'Pearson Correlation': gc_pearson_corr if gc_pearson_p < 0.001 else "not significant",
            'Spearman Correlation': gc_spearman_corr if gc_spearman_p < 0.001 else "not significant"
        })

        # Print correlation results
        print(f"Dinucleotide Perplexity - Pearson: {di_pearson_corr:.4f}, Spearman: {di_spearman_corr:.4f}")
        print(f"GC Content - Pearson: {gc_pearson_corr:.4f}, Spearman: {gc_spearman_corr:.4f}")

    # Save results to an Excel file
    df = pd.DataFrame(results)
    output_file = "perplexity_gc_correlations_fixed.xlsx"
    df.to_excel(output_file, index=False)
    print(f"Results saved to {output_file}")

# Process all .txt files in the current folder
process_files("*.txt")


Processing file: bHLH_Bhlhb2_TCAAGG20NGAA_CACGTG_14_4.txt
Dinucleotide Perplexity - Pearson: 0.0540, Spearman: 0.0506
GC Content - Pearson: 0.1399, Spearman: 0.1582
Processing file: bHLH_MAX_TGACCT20NGA_CACGTG_12_3.txt
Dinucleotide Perplexity - Pearson: 0.2148, Spearman: 0.2476
GC Content - Pearson: -0.0329, Spearman: -0.0600
Processing file: bHLH_TCF3_TACCCG20NCCC_CACCTG_12_3.txt
Dinucleotide Perplexity - Pearson: -0.0820, Spearman: -0.1085
GC Content - Pearson: 0.0877, Spearman: 0.1487
Processing file: bHLH_TCF4_TGCGAA20NGA_CACCTG_14_3.txt
Dinucleotide Perplexity - Pearson: 0.1016, Spearman: 0.1116
GC Content - Pearson: -0.0018, Spearman: -0.0094
Processing file: bHLH_TFAP4_TCACGT20NGCA_CAGCTG_12_4.txt
Dinucleotide Perplexity - Pearson: 0.2463, Spearman: 0.2525
GC Content - Pearson: -0.0335, Spearman: 0.0565
Processing file: bHLH_TFE3_TACCGT20NTA_CACGTG_12_3.txt
Dinucleotide Perplexity - Pearson: 0.4072, Spearman: 0.4770
GC Content - Pearson: -0.2838, Spearman: -0.3234
Processing fil