# Base Folder

In [115]:
import os

BASE_FOLDER = '2024-07-14 22-10-11'

base_folder = os.path.join('output', BASE_FOLDER)

### Read Files

In [116]:
import json

class Word:
    def __init__(self, word_json, filename):
        name, _ = os.path.splitext(os.path.basename(filename))

        self.file_index = int(name)
        self.probability = word_json['probability']
        self.word = word_json['word']

def parse_json(filename):
    retVal = []

    with open(filename, 'r') as f:
        log = json.load(f)

    for word in log['words']:
        retVal.append(Word(word, filename))
    
    return retVal

In [117]:
folder_dict = {}

for span_bool in os.listdir(base_folder):
    sb_split = span_bool.split()
    #key = (int(sb_split[0]), bool(sb_split[1]))        # boo() of any string other than empty string is True
    key = (int(sb_split[0]), sb_split[1] == 'True')
    #print(str(key))

    words = []

    foldername = os.path.join(base_folder, span_bool)
    for filename in os.listdir(foldername):
        if filename.endswith('.json'):
            words.extend(parse_json(os.path.join(foldername, filename)))
    
    folder_dict[key] = sorted(words, key=lambda o: o.file_index)

### Color Printing

In [118]:
from IPython.display import display, HTML

# fg is in the format: '#RRGGBB'.  alpha doesn't seem to be supported
def print_colored(text, fg):
    output = "<span style=\"color:%s\">%s</span>" % (fg, text)
    display(HTML(output))

# items=[(text, fg), (text, fg) ...]
def print_colored_tuples(items):
    output = "".join("<span style=\"color:%s\">%s</span><br>" % (fg, text) for text, fg in items)
    display(HTML(output))

# Converts a color from hex format to rgb
def hex_to_rgb(hex):
    return tuple(int(hex[i:i+2], 16) for i in (0, 2, 4))

# Converts a color from rgb format back to hex
def rgb_to_hex(rgb):   
    return '#{:02x}{:02x}{:02x}'.format(*rgb)

# Linearly interpolates between two colors
def lerp(c1, c2, pct):
    r1, g1, b1 = hex_to_rgb(c1[1:])   # Remove '#' and convert to RGB
    r2, g2, b2 = hex_to_rgb(c2[1:])
    
    r = int((r2 - r1) * pct + r1)  # Interpolate for each channel.
    g = int((g2 - g1) * pct + g1)
    b = int((b2 - b1) * pct + b1)
    
    return rgb_to_hex((r, g, b))  # Convert back to HEX and return.

print_colored('there', '#FF0000')
print_colored('calculated color', lerp('#FF0000', '#0000FF', 0.35))

In [119]:
# Takes two lists of tuples, iterates both lists in a single pass
# Replaces entries with None when the list lengths are uneven
def zip_longest_custom(col1, col2):
    len1 = len(col1)
    len2 = len(col2)
    maxlen = max(len1, len2)
    
    for i in range(maxlen):  # Loop through the length of the longest list.
        val1 = col1[i] if i < len1 else ('', 'black')
        val2 = col2[i] if i < len2 else ('', 'black')
        
        yield val1, val2

# Each column is a list of tuples:
# col=[(text, fg), (text, fg) ...]
def print_colored_columns(col1, col2):
    # Use zip_longest instead of chain for proper handling of uneven list lengths
    combined = zip_longest_custom(col1, col2)

    output = ""
    for item1, item2 in combined:
        html1 = '<span style="width: 100%; color:' + item1[1] + '">' + item1[0] + '</span>'
        html2 = '<span style="width: 100%; color:' + item2[1] + '">' + item2[0] + '</span>'

        # looks like <br> isn't needed at the end of this one
        combined_html = f"""
        <div style="display:flex;">
            {html1}
            {html2}
        </div>
        """

        output += combined_html

    display(HTML(output))

col1 = [("Hello", "red"), ("There", "green")]
col2 = [("Python", "blue"), ("Development", "orange"), ("Fun!", "purple")]
print_colored_columns(col1, col2)

In [120]:
def get_words_to_colortuple(words):
    items = []

    for word in words:
        items.append((word.word, lerp('#000000', '#FFFFFF', word.probability)))

    return items

# False vs True

In [121]:
# TODO: false and true seem identical.  Make a function that compares the two columns, print the diffs

unique_ints = sorted(set([key[0] for key in folder_dict.keys()]))

for span in unique_ints:
    print('-------------- ' + str(span) + '--------------')
    col_false = get_words_to_colortuple(folder_dict[(span, False)])
    col_true = get_words_to_colortuple(folder_dict[(span, True)])

    print_colored_columns(col_false, col_true)

-------------- 1--------------


-------------- 2--------------


-------------- 4--------------


-------------- 8--------------


-------------- 16--------------


-------------- 32--------------


-------------- 64--------------


### Compare Two Columns

They are identical.  I'm suspicious of how they were generated

In [122]:
import difflib

def words_to_tuples(words):
    return [(getattr(obj, 'word'), getattr(obj, 'probability')) for obj in words]
    
def compare_two_columns(col1, col2):
    tuples1 = words_to_tuples(col1)
    tuples2 = words_to_tuples(col2)
    diff_text = '\n'.join([a[1] for a in difflib.ndiff(tuples1, tuples2)])
    print(diff_text)

for span in unique_ints:
    print('-------------- ' + str(span) + '--------------')
    compare_two_columns(folder_dict[(span, False)], folder_dict[(span, True)])

-------------- 1--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
-------------- 2--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
-------------- 4--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
-------------- 8--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
-------------- 16--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
-------------- 32--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
-------------- 64--------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
