In [1]:
import json
from random import choices
import re

def clean_markdown_symbols(text):
    # Define patterns to clean
    patterns = [
        (r'\*\*(.*?)\*\*', r'\1'),  # Bold: **text** -> text
        (r'\*(.*?)\*', r'\1'),      # Italic: *text* -> text
        (r'_(.*?)_', r'\1'),        # Underscore italic: _text_ -> text
        (r'`(.*?)`', r'\1'),        # Code: `text` -> text
        (r'~~(.*?)~~', r'\1'),      # Strikethrough: ~~text~~ -> text
        (r'#{1,6}\s', ''),          # Headers: # Header -> Header
        (r'\[(.*?)\]\(.*?\)', r'\1'), # Links: [text](url) -> text
    ]
    
    cleaned_text = text
    for pattern, replacement in patterns:
        cleaned_text = re.sub(pattern, replacement, cleaned_text)
    
    return cleaned_text


In [2]:
with open('../data/byron_processed_poems.json') as f:
    byron_poems = json.load(f)

with open('../data/byron_generated_poems.json') as f:
    generated_poems = json.load(f)

In [3]:
sampled_byron = choices(byron_poems, k=6)
sampled_generated = choices(generated_poems, k=3)

In [4]:
example_poems = sampled_byron[:3]
evaluation_poems = sampled_byron[3:] + sampled_generated

In [5]:
example_json = []

for poem in example_poems:
    clean_lines = ''.join([clean_markdown_symbols(line) for line in poem["lines"]])
    poem_object = {
        "title": "",
        "lines":[clean_lines],
        "isGenerated": False
    }
    example_json.append(poem_object)

In [6]:
evaluation_json = []
for poem in evaluation_poems:
    # print(poem["author"])
    clean_lines = ''.join([clean_markdown_symbols(line) for line in poem["lines"]])
    poem_object = {
        "title": "",
        "lines":[clean_lines],
        "isGenerated": poem["author"] == "Generated"
    }
    evaluation_json.append(poem_object)

In [7]:
import random
random.shuffle(evaluation_json)

In [10]:
with open('example-eval.json', 'w') as f:
    json.dump(example_json, f, indent=2)

In [11]:
with open('test-eval.json', 'w') as f:
    json.dump(evaluation_json, f, indent=2)

In [12]:
example_json

[{'title': '',
  'lines': ['Absent or present, still to thee,\nMy friend, what magic spells belong!\nAs all can tell, who share, like me,\nIn turn thy converse, and thy song.\n\nBut when the dreaded hour shall come\nBy Friendship ever deemed too nigh,\nAnd "Memory" o\'er her Druid\'s tomb\nShall weep that aught of thee can die,\n\nHow fondly will she then repay\nThy homage offered at her shrine,\nAnd blend, while ages roll away,\n_Her_ name immortally with _thine_!'],
  'isGenerated': False},
 {'title': '',
  'lines': ["Anne's Eye is liken'd to the _Sun_,\nFrom it such Beams of Beauty fall;\nAnd _this_ can be denied by none,\nFor like the _Sun_, it shines on _All_.\n\nThen do not admiration smother,\nOr say these glances don't become her;\nTo _you_, or _I_, or _any other_\nHer _Sun_, displays perpetual Summer."],
  'isGenerated': False},
 {'title': '',
  'lines': ["When Man, expell'd from Eden's bowers,\nA moment linger'd near the gate,\nEach scene recall'd the vanish'd hours,\nAnd bad