# Base Folder

In [10]:
import os

BASE_FOLDER = '2024-07-15 22-29-45'

base_folder = os.path.join('output', BASE_FOLDER)

### Read Files

In [11]:
import json

class Word:
    def __init__(self, word_json, filename):
        name, _ = os.path.splitext(os.path.basename(filename))

        self.file_index = int(name)
        self.probability = word_json['probability']
        self.word = word_json['word']

def parse_json(filename):
    retVal = []

    with open(filename, 'r') as f:
        log = json.load(f)

    for word in log['words']:
        retVal.append(Word(word, filename))
    
    return retVal

In [12]:
folder_dict = {}

for span_bool in os.listdir(base_folder):
    sb_split = span_bool.split()
    #key = (int(sb_split[0]), bool(sb_split[1]))        # boo() of any string other than empty string is True
    key = (int(sb_split[0]), sb_split[1] == 'True')
    #print(str(key))

    words = []

    foldername = os.path.join(base_folder, span_bool)
    for filename in os.listdir(foldername):
        if filename.endswith('.json'):
            words.extend(parse_json(os.path.join(foldername, filename)))
    
    folder_dict[key] = sorted(words, key=lambda o: o.file_index)

### Color Printing

In [13]:
from IPython.display import display, HTML

# fg is in the format: '#RRGGBB'.  alpha doesn't seem to be supported
def print_colored(text, fg):
    output = "<span style=\"color:%s\">%s</span>" % (fg, text)
    display(HTML(output))

# items=[(text, fg), (text, fg) ...]
def print_colored_tuples(items):
    output = "".join("<span style=\"color:%s\">%s</span><br>" % (fg, text) for text, fg in items)
    display(HTML(output))

# Converts a color from hex format to rgb
def hex_to_rgb(hex):
    return tuple(int(hex[i:i+2], 16) for i in (0, 2, 4))

# Converts a color from rgb format back to hex
def rgb_to_hex(rgb):   
    return '#{:02x}{:02x}{:02x}'.format(*rgb)

# Linearly interpolates between two colors
def lerp(c1, c2, pct):
    r1, g1, b1 = hex_to_rgb(c1[1:])   # Remove '#' and convert to RGB
    r2, g2, b2 = hex_to_rgb(c2[1:])
    
    r = int((r2 - r1) * pct + r1)  # Interpolate for each channel.
    g = int((g2 - g1) * pct + g1)
    b = int((b2 - b1) * pct + b1)
    
    return rgb_to_hex((r, g, b))  # Convert back to HEX and return.

print_colored('there', '#FF0000')
print_colored('calculated color', lerp('#FF0000', '#0000FF', 0.35))

In [14]:
# Takes an arbitrary number of lists of tuples, iterates all lists in a single pass
# Replaces entries with ('', 'black') when the list lengths are uneven
def zip_longest_custom(*lists):  # Takes 
    num_rows = max(map(len, lists))  # Finds the length of the longest list

    for i in range(num_rows):  # Loop through the length of the longest list.
        yield tuple(lst[i] if i < len(lst) else ('', 'black') for lst in lists)

# Each column is a list of tuples:
# col=[(text, fg), (text, fg) ...]
def print_colored_columns(*cols):
    # Use zip_longest instead of chain for proper handling of uneven list lengths
    combined = zip_longest_custom(*cols)

    output = ""
    for row in combined:
        html = '<div style="display:flex;">'

        for i in range(len(row)):
            html += '<span style="width: 100%; color:' + row[i][1] + '">' + row[i][0] + '</span>'

        html += '</div>'       # looks like <br> isn't needed at the end of this one

        output += html

    display(HTML(output))

col1 = [("Hello", "red"), ("There", "green")]
col2 = [("Python", "blue"), ("Development", "orange"), ("Fun!", "purple")]
print_colored_columns(col1, col2)

In [15]:
def get_words_to_colortuple(words):
    items = []

    for word in words:
        items.append((word.word, lerp('#000000', '#FFFFFF', word.probability)))

    return items

# All Columns

In [18]:
unique_ints = sorted(set([key[0] for key in folder_dict.keys()]))

columns = []

for span in unique_ints:
    span_list = folder_dict[(span, False)]      # only looking at the falses
    columns.append(get_words_to_colortuple(span_list))

print_colored_columns(*columns)