In [23]:
# file to count generated motifs
import pandas as pd
import os

# function setup
def count_cif_files(directory):
    cif_count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.cif'):
                cif_count += 1
    return cif_count


In [24]:
# count files here
data_dir = "../data/"

# set up directories to count stuff
hairpins_dir = os.path.join(data_dir, "motifs/HAIRPIN/")
helices_dir = os.path.join(data_dir, "motifs/HELIX/")
nways_dir = os.path.join(data_dir, "motifs/NWAY/")
twoways_dir = os.path.join(data_dir, "motifs/TWOWAY/")
sstrand_dir = os.path.join(data_dir, "motifs/SSTRAND/")

hairpin_count = count_cif_files(hairpins_dir)
helix_count = count_cif_files(helices_dir)
nway_jct_count = count_cif_files(nways_dir)
twoway_jct_count = count_cif_files(twoways_dir)
sstrand_count = count_cif_files(sstrand_dir)

print(f"Number of hairpins: {hairpin_count}")
print(f"Number of helices: {helix_count}")
print(f"Number of n-way jcts: {nway_jct_count}")
print(f"Number of 2-way jcts: {twoway_jct_count}")
print(f"Number of single strands: {sstrand_count}")


Number of hairpins: 19126
Number of helices: 46782
Number of n-way jcts: 7696
Number of 2-way jcts: 26634
Number of single strands: 8427


In [25]:
# Count numbers of n-way junctions

def count_nway_junctions(directory):
    junction_counts = {}

    for root, dirs, files in os.walk(directory):
        path_parts = root.split("/")  # Split the path to get parts
        if len(path_parts) > 1:
            junction_part = path_parts[-2]  # Get the second-to-last part of the path
            n_junctions = len(junction_part.split("-"))  # Count elements after splitting by '-'

            for file in files:
                if file.endswith('.cif'):
                    if n_junctions not in junction_counts:
                        junction_counts[n_junctions] = 0
                    junction_counts[n_junctions] += 1  # Increment the count for this n-way junction

    return junction_counts

# Print number of n-way junctions of each
print(count_nway_junctions(nways_dir))
    



{3: 4522, 4: 1724, 5: 979, 6: 208, 7: 260, 8: 3}
