In [145]:
import scipy

class PSTNode:
    def __init__(self, label, children):
        """
        Children list[PSTNode Objects]: 
        """
        self.label = label

        # a list of tuples, where the first item is the index of the child and the second item is the weight of the edge (p)
        self.children = children 


class PSTTree():
    def __init__(self):
        # dictionary of nodes 
        self.node_dict = {}

    def add_children(self, parent, child, p):
        self.node_dict[parent].children.append((child,p))

    def add_node(self, label, node):
        self.node_dict[label] = node 

f = "/home/george-vengrovski/Downloads/pst_data.mat"
mat_data = scipy.io.loadmat(f, struct_as_record=False, squeeze_me=True)
tree_data = mat_data['TREE']  

pst_tree = PSTTree()

# tree_data = tree_data[::-1]
for i, node in enumerate(tree_data):
    if i == 0:
        node = PSTNode(label="root", children=[])
        pst_tree.add_node(label="root", node=node)
    # remove the empty higher Nodes 
    elif len(node.parent) == 0:
        continue
    else:
        for parent, label, p in zip(tree_data[i].parent[0], tree_data[i].label, tree_data[i].p):
            # first layer from root, special case 
            if len(label) == 1:
                node = PSTNode(label=label, children=[])
                pst_tree.add_node(label=label, node=node)

                # and then previous index add children, parent will be root for all of them soooo
                pst_tree.add_children(parent="root", child=label, p=p)

            else:
                old_states = label[1:]

                node = PSTNode(label=label, children=[])
                pst_tree.add_node(label=label, node=node)

                # search for parents
                pst_tree.add_children(parent=old_states, child=label, p=p)

In [148]:
from graphviz import Digraph

def visualize_pst_tree(pst_tree):
    dot = Digraph(comment='Probabilistic Suffix Tree')

    # Add nodes
    for label, node in pst_tree.node_dict.items():
        dot.node(label, label)

    # Add edges
    for label, node in pst_tree.node_dict.items():
        for child_label, p in node.children:
            # Ensure child exists in node_dict before adding edge
            if child_label in pst_tree.node_dict:
                dot.edge(label, child_label, label=f'p={p}')

    return dot

# Visualize the tree
dot = visualize_pst_tree(pst_tree)
dot.render('pst_tree_visualization', view=True, format='png')  # Adjust the path as needed


'pst_tree_visualization.png'

In [None]:
import re

def integer_to_letter(match):
    """Converts an integer match to a corresponding letter (1 -> A, 2 -> B, etc.)."""
    num = int(match.group())
    # Subtract 1 from the number to get 0-based indexing for letters, then mod by 26 to handle numbers > 26
    return chr((num - 1) % 26 + ord('A'))

def replace_integers_with_letters(file_path):
    """Reads a file, replaces all integers with their corresponding letters, and writes the changes back to the file."""
    with open(file_path, 'r') as file:
        content = file.read()
    
    # Replace all occurrences of integers in the file with their corresponding letters
    modified_content = re.sub(r'\b\d+\b', integer_to_letter, content)
    
    with open(file_path, 'w') as file:
        file.write(modified_content)

# Replace '/home/george-vengrovski/Documents/projects/tweety_bert_paper/gtruth_pst_data.txt'
# with your actual file path
file_path = '/home/george-vengrovski/Documents/projects/tweety_bert_paper/hdbscan_labels.txt'
replace_integers_with_letters(file_path)


In [107]:
import numpy as np

f = "/home/george-vengrovski/Downloads/5177_songs_npz_files/USA5177_45268.29211546_12_8_8_6_51.npz"

f = np.load(f)

print(f.files)

print(np.unique(f["song"]))

['s', 'labels', 'song']
[0 1]
