In [1]:
# parse data
import json
def parse_poems(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Split content by multiple newlines (3 or more)
    raw_poems = [p.strip() for p in content.split('\n\n\n') if p.strip()]

    # print(raw_poems)
    
    formatted_poems = []

    for poem in raw_poems:
        # Skip empty sections
        if not poem.strip():
            continue
            
        # Split into lines and filter out empty lines
        lines = [line.strip() for line in poem.split('\n') if line.strip()]
        
        # Extract title (removing  marks) and content
        title = None
        content_lines = []
        for line in lines:
            if not line[0].isdigit():  # Skip date lines (starting with numbers)
                content_lines.append(line)

        if content_lines:
            title = content_lines[0]
            formatted_poems.append({"title": title,"body": chr(10).join(content_lines[1:])})
    
    return formatted_poems

In [3]:
file_path = "./data.txt"
poems = parse_poems(file_path)

In [25]:
import random
poems[random.randint(1, 61)]

{'title': '猫山',
 'body': '就沿着树林的边走\n它指引着你某个方向\n凋零的叶的细胞都活了\n懒散的挂在新的树桠上\n我不好奇山上有什么\n我就想在这山下靠这树一会'}

In [49]:
poems

[{'title': '河',
  'body': '在河水干涸前\n我将一只虫放在那里\n让他熟悉着温润而又恶臭的空气\n它却飞起的像花一样\n清澈的河水杂然无味\n流淌在巨大的山峰底下\n峰上盖着厚厚的雪',
  'tone': ['沉思', '隐约的忧伤', '自然的敬畏'],
  'style': ['简洁的语言', '自由诗体', '哲学性的思考'],
  'image': ['干涸的河水', '飞起的虫', '清澈的河水', '巨大的山峰', '覆盖着雪的山峰'],
  'structure': ['自由的行分', '无固定韵律', '自然流畅的叙述'],
  'renamed_title': False,
  'imagery': ['河水', '虫子', '山峰', '积雪'],
  'symbolism': ['河水象征生命的流逝', '虫子象征生命的脆弱与适应', '山峰象征永恒与自然的力量'],
  'themes': ['生命的短暂与永恒', '自然与人类的关系', '适应与变化'],
  'poetic_techniques': ['隐喻', '对比', '象征'],
  'emotional_impact': ['从平静到深思', '引发对生命与自然的感悟'],
  'possible_expansions': ['探讨更多自然元素的象征意义', '加入人类活动对自然的影响', '扩展对河流的历史与未来的描写']},
 {'title': '雨',
  'body': '撕裂挂在雨水上的伞\n雨像是隐藏在木中细断了的线\n舒缓的冷风倚着夹缝挤进来\n这无法吹干我们每个人的头发\n也无法让我们每个人着凉\n若嵌着的玻璃被风嚼碎\n偏偏细碎便是雨\n是大雨'},
 {'title': '湖',
  'body': '湖水已在我眼里结痂\n瞳孔中绽出一堆枯萎的莲花\n冬\n冬日来袭\n我不再惧怕那焦糊了的莲蓬\n温室外的昆虫都已死去\n还没吮吸够空气的灰绿被冻的发脆\n吹落的绿叶驾着荷叶驰骋\n野鸭作伴\n在还有余温的地方画出锋利的对勾'},
 {'title': '我踱来踱去',
  'body': '似乎在等待着谁\n却只看到了像蒲公英一样的蚊\n这是或许有风吹来\n我放弃挣扎\n浸入冰冷的水中\n让这刺骨的液态金属\n一寸一寸把我挤破'}

In [26]:
import json

with open("my_poems.json", "w", encoding="utf-8") as f:
    json.dump(poems, f, indent=2, ensure_ascii=False)


In [50]:
import os 
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

from pydantic import BaseModel
class Poem(BaseModel):
    title: str
    renamed_title: bool
    tone: list[str]
    style: list[str]
    imagery: list[str]
    symbolism: list[str]
    themes: list[str]
    structure: list[str]
    poetic_techniques: list[str]
    emotional_impact: list[str]
    possible_expansions: list[str]

def annotate_poem(title, body):
    prompt = f"""
    Please analyze the following Chinese poem and return your analysis in Chinese in a JSON format. 
    Avoid directly quoting entire lines or phrases from the poem. Instead, provide concise, paraphrased, or interpretive insights.
    Make sure to thoroughly cover the poem’s style, tone, imagery, structure, and other key elements, but do so by inference and explanation rather than verbatim repetition.
    If you feel the poem’s original title could be improved, propose a new title in Chinese and indicate in the JSON whether you renamed it.
    
    ---
    Title: {title if title else 'Untitled'}
    Body:
    {body}
    ---
    
    In your response, please include these details in Chinese:
    
    1. New Title Suggestion (if needed) – Indicate if you think the original title is insufficient.
    2. Tone (tone) – The poem’s emotional ambience or atmosphere.
    3. Style (style) – The poet’s approach, writing techniques, or influences observed in this specific piece.
    4. Imagery (imagery) – Key images or visual elements that stand out.
    5. Symbolism (symbolism) – Any symbolic meaning behind the imagery or references.
    6. Themes (themes) – The central ideas or messages explored by the poem.
    7. Structure (structure) – Notable features about stanza breaks, lineation, or rhythm/pacing.
    8. Poetic Techniques (poetic_techniques) – Examples of figurative language like metaphors, personification, or rhetorical devices.
    9. Emotional Impact (emotional_impact) – How the poem’s mood or sentiment might evolve from start to finish.
    10. Possible Expansions (possible_expansions) – Brief ideas on how this poem could be expanded or inspire further works.
    
    Return only the JSON in the following structure (in Chinese):
    
    ```json
    
      "title": "（最终标题）",
      "renamed_title": false,
      "tone": ["..."],
      "style": ["..."],
      "imagery": ["..."],
      "symbolism": ["..."],
      "themes": ["..."],
      "structure": ["..."],
      "poetic_techniques": ["..."],
      "emotional_impact": ["..."],
      "possible_expansions": ["..."]
    
    ```

    Also note that:
    renamed_title: true if you changed the original title, otherwise false.
    For each array field (e.g., "tone", "style", "themes", etc.), provide a list of concise descriptive phrases in Chinese.
    Please do not include any additional text or explanations outside of this JSON object.
    """

    system_prompt = "You are an expert in understanding poetry and data extraction. The author’s poetry deeply explores introspection and existential themes of life, identity, and freedom, using vivid natural and everyday imagery alongside philosophical and spiritual elements such as night, rivers, rain, journeys, and a longing for home. The tone shifts between quiet reflection, raw emotion, hope, and yearning, while the free-verse structure employs direct, minimalist language with occasional symbolic or abstract expressions. Recurring motifs of distance, searching, and the tension between belonging and escape are prominent. Additionally, classical references, cultural touchstones, and personal anecdotes are woven throughout, creating a modern yet reflective style that provides consistent context for analyzing the author’s work. When given a poem by this author, analyze and summarize it according to the specified structure, maintaining alignment with the author’s established style and themes."
    
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-2024-11-20",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        temperature=0.0,
        response_format=Poem
    )
    # print(completion.choices[0].message.content)
    # Attempt to parse as JSON
    try:
        return json.loads(completion.choices[0].message.content.strip())
    except:
        # Fallback if not valid JSON
        return {"tone": "N/A", "style": "N/A", "image": "N/A", "structure": "N/A"}

# Example usage
for poem in poems[1:]:
    analysis = annotate_poem(poem["title"], poem["body"])
    poem.update(analysis)



{
  "title": "雨的碎片",
  "renamed_title": true,
  "tone": ["冷静", "沉思", "微带忧郁"],
  "style": ["自由诗", "简约", "象征主义"],
  "imagery": ["雨水中的伞", "冷风", "碎裂的玻璃"],
  "symbolism": ["雨象征着细碎的情感或记忆", "风代表无形的力量"],
  "themes": ["自然与人类的关系", "细微事物中的深意", "脆弱与力量的对比"],
  "structure": ["无固定韵律", "短句与意象的堆叠", "自由流动的节奏"],
  "poetic_techniques": ["隐喻", "拟人", "对比"],
  "emotional_impact": ["从冷静的观察到对自然力量的敬畏"],
  "possible_expansions": ["探讨雨后世界的变化", "加入更多关于雨中人类活动的描写"]
}
{"title":"湖","renamed_title":false,"tone":["冷峻","沉思","略带忧伤"],"style":["自由诗","简洁而深刻","富有象征意义"],"imagery":["结痂的湖水","枯萎的莲花","冬日的寒冷","吹落的绿叶","野鸭的轨迹"],"symbolism":["湖水象征内心的伤痕","枯萎的莲花象征生命的衰败","冬日象征时间的流逝与冷酷","绿叶和荷叶象征自然的循环"],"themes":["生命的脆弱与短暂","自然与人类的关系","时间的不可逆性","内心的孤独与反思"],"structure":["自由的行文结构","无固定韵律","通过意象的叠加构建情感"],"poetic_techniques":["隐喻","拟人","象征","对比"],"emotional_impact":["从冷静的观察到深刻的反思","逐渐引发读者的共鸣与感慨"],"possible_expansions":["可以进一步探讨湖水与人类情感的关系","加入更多季节变化的描写","扩展对自然与人类互动的哲学思考"]}
{"title":"等待与沉溺","renamed_title":true,"tone":["孤独","绝望","冷静"],"style":["

In [41]:
poems[0]

{'title': '河',
 'body': '在河水干涸前\n我将一只虫放在那里\n让他熟悉着温润而又恶臭的空气\n它却飞起的像花一样\n清澈的河水杂然无味\n流淌在巨大的山峰底下\n峰上盖着厚厚的雪',
 'tone': ['沉思', '自然', '感伤'],
 'style': ['现代诗', '自由诗', '象征主义'],
 'image': ['干涸的河水', '飞起的虫', '清澈的河水', '巨大的山峰', '覆盖着雪的山峰'],
 'structure': ['自由诗结构', '无固定韵律', '意象对比']}

In [54]:
poems[-1]

{'title': '在法兰德斯的罂粟花',
 'body': '罂粟驰骋在法兰西斯战场上\n那鲜红如耶稣被钉在十字架上流的血一般\n不知名的百灵依旧唱着它的歌\n蜗居在它熟悉的天空\n但如罂粟般交错纵横的枪声打扰了我们欣赏这景色的惬意怅惘\n像我们这般的已逝之人\n不久之前还呼吸着大把大把的空气\n看这初升之日，残阳化血\n我们爱着又被爱着\n如今却只能躺在法兰西斯的战场\n战争还是要继续\n我们决定了孤注一掷\n请接好，这燃烧着的火把\n颤抖的双手也无法阻止\n请一定，将这火高高的举起\n让它将这一切化为灰烬\n倘若你们辜负了这些死去之人\n即使罂粟酒满战场，我们也永不安息',
 'renamed_title': True,
 'tone': ['悲怆', '庄严', '沉思'],
 'style': ['自由诗', '象征主义', '直白而深刻'],
 'imagery': ['罂粟花', '战场', '血', '百灵鸟', '火炬'],
 'symbolism': ['罂粟象征牺牲与记忆', '火炬象征传承与希望', '血象征战争的代价'],
 'themes': ['战争与和平', '牺牲与传承', '记忆与遗忘'],
 'structure': ['自由诗结构', '无固定韵律', '段落分明'],
 'poetic_techniques': ['象征手法', '隐喻', '对比'],
 'emotional_impact': ['从沉痛到激昂', '从哀悼到呼吁'],
 'possible_expansions': ['探讨战争对个体的影响', '描写战后重建的希望', '扩展对自然与战争的对比描写']}

In [51]:
import json

with open("poems_features.json", "w", encoding="utf-8") as f:
    json.dump(poems, f, indent=2, ensure_ascii=False)


In [34]:
# PROMPT_EXTENT
# After the new poem content is generated, compare with the original poem meta data to ensure alignment with tine, style etc, if it is not aligned re-think, analyze and re-generate.
# Then add new attibute to json: "match_score":  denote how much it is relevant by comparison (integer from 1-10, 0:entirelly DIFFERENT-10:basically the SAME)
#                                "quality_score": the quality of the newly generated poem (0:not following any of metadata writing way 10: extreme quality that author would write)
#                                 also include the "original_title": the original poem title 
# Make sure the new poem is REALLY different from the original poem and be accurate, precise in the way follow how authors writes like this.

In [9]:
# Expansion 
from pydantic import BaseModel
class PoemExpand(BaseModel):
    title: str
    body: str
    tone: list[str]
    style: list[str]
    imagery: list[str]
    symbolism: list[str]
    themes: list[str]
    structure: list[str]
    poetic_techniques: list[str]
    emotional_impact: list[str]
    possible_expansions: list[str]
    original_title: str

In [36]:
import os 
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)


PROMPT_TEMPLATE = """
You are analyzing a poem by an author whose work focuses on introspection, existential themes, 
and vivid natural imagery with a free-verse style. The author's poems often explore life, identity, 
the tension between belonging and escape, and philosophical/spiritual elements.

Original poem metadata:
- Title: "{title}" 
- Body: {body}
- Tone: {tone}
- Style: {style}
- Imagery: {imagery}
- Symbolism: {symbolism}
- Themes: {themes}
- Structure: {structure}
- Poetic techniques: {poetic_techniques}
- Emotional impact: {emotional_impact}
- Possible expansions: {possible_expansions}

The details for these fields could be useful but not necessary as the most important is follow the style and use same tone, similar thinking from the author in writing poems, be consistent.
To help better understand how these can be interpreted:
    1. New Title Suggestion (if needed) – Indicate if you think the original title is insufficient.
    2. Tone (tone) – The poem’s emotional ambience or atmosphere.
    3. Style (style) – The poet’s approach, writing techniques, or influences observed in this specific piece.
    4. Imagery (imagery) – Key images or visual elements that stand out.
    5. Symbolism (symbolism) – Any symbolic meaning behind the imagery or references.
    6. Themes (themes) – The central ideas or messages explored by the poem.
    7. Structure (structure) – Notable features about stanza breaks, lineation, or rhythm/pacing.
    8. Poetic Techniques (poetic_techniques) – Examples of figurative language like metaphors, personification, or rhetorical devices.
    9. Emotional Impact (emotional_impact) – How the poem’s mood or sentiment might evolve from start to finish.
    10. Possible Expansions (possible_expansions) – Brief ideas on how this poem could be expanded or inspire further works.

Task:
1. Write a new or expanded poem in the same style and spirit as the original.
2. Do not reuse or quote lines directly. Instead, paraphrase or creatively rephrase the imagery and ideas.
3. Optionally refine the title if the original seems insufficient, or if 'renamed_title' is True. 
4. Preserve the original poem's tone, style, and thematic essence, incorporating relevant expansions suggested 
   (e.g., more detail about nature, philosophical reflections, or extended metaphors).
5. Return your poem in Chinese in only a JSON format in the following structure (in Chinese as the author's poems):

      "title": "",
      "body": "",
      "tone": ["..."],
      "style": ["..."],
      "imagery": ["..."],
      "symbolism": ["..."],
      "themes": ["..."],
      "structure": ["..."],
      "poetic_techniques": ["..."],
      "emotional_impact": ["..."],
      "possible_expansions": ["..."],
      "original_title": "" (the original poem title)

Please output only the JSON data as plain text without enclosing it in ```json` blocks or any other formatting.

Constraints:
- 绝对不要排比！
- 要尽量简短简洁，不要太长，与原诗歌长度相似
- Avoid verbatim copying of lines from the original body.
- Keep a cohesive free-verse structure aligned with the original style.
- Feel free to deepen imagery or extend metaphors based on 'possible_expansions'.
- Please do not include any additional text or explanations outside of this JSON object.

"""

def expand_poem(poem, model="gpt-4o-2024-11-20", temperature=0.7):
    """
    Generates an expanded version of a poem using its metadata and 
    user instructions to avoid direct quotes.
    Returns a dict with keys 'title' and 'body'.
    """
    # Prepare fields
    title = poem.get("title", "Untitled")
    body = poem.get("body", "")
    tone = ", ".join(poem.get("tone", []))
    style = ", ".join(poem.get("style", []))
    imagery = ", ".join(poem.get("imagery", []))
    symbolism = ", ".join(poem.get("symbolism", []))
    themes = ", ".join(poem.get("themes", []))
    structure = ", ".join(poem.get("structure", []))
    poetic_techniques = ", ".join(poem.get("poetic_techniques", []))
    emotional_impact = ", ".join(poem.get("emotional_impact", []))
    possible_expansions = ", ".join(poem.get("possible_expansions", []))

    # Insert into prompt
    prompt = PROMPT_TEMPLATE.format(
        title=title,
        body=body,
        tone=tone,
        style=style,
        imagery=imagery,
        symbolism=symbolism,
        themes=themes,
        structure=structure,
        poetic_techniques=poetic_techniques,
        emotional_impact=emotional_impact,
        possible_expansions=possible_expansions,
    )

    # System + user messages
    messages = [
        # {"role": "system", "content": "You are a helpful assistant that can expand poems."},
        {"role": "user", "content": prompt}
    ]

    completion = client.beta.chat.completions.parse(
        model=model,
        messages=messages,
        temperature=temperature,
        # response_format=PoemExpand
        # max_tokens=1024
    )
    content = completion.choices[0].message.content.strip()

    # Attempt to parse the response as JSON:
    try:
        expanded_poem = json.loads(content)
    except:
        # Fallback if it's not valid JSON
        # We might attempt a simple extraction
        expanded_poem = {"title": "Untitled Expansion", "body": content}

    return expanded_poem


In [37]:
import json

with open("poems_features.json", "r", encoding="utf-8") as f:
    poems = json.load(f)


In [38]:
augmented_poems = []

In [40]:
for poem in poems[1:]:
    new_version = expand_poem(poem, model="o1-mini", temperature=1)
    # Optionally verify new_version has the desired structure:
        # You might store them, or update your existing poem with a variation
    augmented_poems.append(new_version)
    print(new_version)

# Now you have new poems that are expansions/variations in the same style.
print(f"Generated {len(augmented_poems)} expansions from {len(poems)} base poems.")


{'title': '风的低语', 'body': '落叶在风中轻舞\n风仿佛无形的手拂过树梢\n微凉的气息渗透每一个裂缝\n无法平息我们心中的波动\n也无法冻结我们的思绪\n若断枝被风撕裂\n细碎正是风的呢喃\n是凛冽的风', 'tone': ['冷静', '沉思', '微带忧郁'], 'style': ['自由诗', '简约', '象征主义'], 'imagery': ['风中的落叶', '轻舞的枝条', '破碎的树枝'], 'symbolism': ['风象征无形的力量与变化', '落叶代表逝去与脆弱'], 'themes': ['自然与人类的关系', '内心的脆弱与坚韧', '无形力量的影响'], 'structure': ['无固定韵律', '短句与意象的堆叠', '自由流动的节奏'], 'poetic_techniques': ['隐喻', '拟人', '对比'], 'emotional_impact': ['从冷静的描绘到对自然力量的敬畏'], 'possible_expansions': ['描绘风后的景象变化', '加入更多关于人在风中活动的细节'], 'original_title': '雨的碎片'}
{'title': '寒塘', 'body': '寒塘凝视我内心的裂痕\n瞳中绽放枯榕的影子\n冬季的回声\n冰冷侵袭\n我不再害怕那干枯的枝桠\n温暖之外的生灵已沉寂\n尚未吸尽的灰暗被寒霜冻结\n飘落的枯叶随水漂流\n孤雁作伴\n在残留温度中勾勒出锋利的弧线', 'tone': ['冷峻', '沉思', '略带忧伤'], 'style': ['自由诗', '简洁而深刻', '富有象征意义'], 'imagery': ['凝视的寒塘', '枯榕的影子', '冬季的寒冷', '飘落的枯叶', '孤雁的轨迹'], 'symbolism': ['寒塘象征内心的裂痕', '枯榕象征生命的枯萎', '冬季象征时间的流逝与冷漠', '枯叶和水流象征自然的循环'], 'themes': ['生命的脆弱与短暂', '自然与人类的关系', '时间的不可逆性', '内心的孤独与反思'], 'structure': ['自由的行文结构', '无固定韵律', '通过意象的叠加构建情感'], 'poetic_techniques': ['隐喻', '拟人', '象征', 

In [41]:
import json

with open("augmented_poems_o1_mini.json", "w", encoding="utf-8") as f:
    json.dump(augmented_poems, f, indent=2, ensure_ascii=False)


In [45]:
import json

with open("augmented_poems.json", "w", encoding="utf-8") as f:
    json.dump(augmented_poems, f, indent=2, ensure_ascii=False)


{'title': '雪河',
 'body': '在河流凝固前\n我将一片落叶放在水面\n任它漂浮，如同命运\n寒冷的风吹过，\n它旋转着，像野花的舞蹈\n河水的清澈带着冰冷\n在群山的阴影中流淌\n山顶的积雪仿佛永恒的光辉',
 'tone': ['沉思', '隐约的忧伤', '自然的敬畏'],
 'style': ['简洁的语言', '自由诗体', '哲学性的思考'],
 'imagery': ['河流', '落叶', '群山', '积雪'],
 'symbolism': ['河流象征生命的流动与停滞', '落叶象征时间的无情与自然的轮回', '群山象征自然的伟大与永恒'],
 'themes': ['生命的流逝与停顿', '自然的力量与人类的无助', '时间的不可逆与自然的循环'],
 'structure': ['自由的行分', '无固定韵律', '自然流畅的叙述'],
 'poetic_techniques': ['隐喻', '对比', '象征'],
 'emotional_impact': ['从平静到深思', '引发对生命、时间与自然的感悟'],
 'possible_expansions': ['通过季节的变化扩展对自然的描写',
  '加入更多对自然中小生物的观察与感悟',
  '探讨水的不同形态对生命的象征意义'],
 'original_title': '河'}

In [42]:
augmented_poems[-4]

{'title': '岁暮归心',
 'body': '苍鬓逐名途，古林阻归程。心绪寄琴音，知音少，弦断无人听。\n宁踏林间径，五禽舞悠然。',
 'tone': ['感慨', '反思', '释然'],
 'style': ['古典诗词风格', '简练而深刻', '含蓄表达'],
 'imagery': ['苍鬓象征岁月流逝', '古林象征自然与故乡', '琴音象征内心的诉求', '五禽舞象征自然与放松'],
 'symbolism': ['名途象征世俗追求', '琴音象征孤独与知音难觅', '五禽舞象征回归自然的选择'],
 'themes': ['岁月与名利的关系', '归隐与内心的平静', '人与自然的和谐'],
 'structure': ['短小精悍', '对比鲜明', '结尾点题'],
 'poetic_techniques': ['象征手法', '对比手法', '用典'],
 'emotional_impact': ['从感慨到释然', '从沉重到轻松'],
 'possible_expansions': ['描绘更多关于岁月与名利的故事', '细腻描写五禽舞的场景', '深入表达寻找知音的心情'],
 'original_title': '功名与归隐'}

In [43]:
poems[-4]

{'title': '功名与归隐',
 'body': '白首为功名。旧山松竹老，阻归程。欲将心事付瑶琴。知音少，弦断有谁听？\n不如打个五禽戏！',
 'renamed_title': True,
 'tone': ['感慨', '反思', '释然'],
 'style': ['古典诗词风格', '简练而深刻', '含蓄表达'],
 'imagery': ['白发象征岁月流逝', '松竹象征故乡的宁静', '瑶琴象征内心的诉求', '五禽戏象征自然与放松'],
 'symbolism': ['功名象征世俗追求', '瑶琴象征孤独与知音难觅', '五禽戏象征回归自然的选择'],
 'themes': ['功名与岁月的关系', '归隐与内心的平静', '人与自然的和谐'],
 'structure': ['短小精悍', '对比鲜明', '结尾点题'],
 'poetic_techniques': ['象征手法', '对比手法', '用典'],
 'emotional_impact': ['从感慨到释然', '从沉重到轻松'],
 'possible_expansions': ['探讨更多关于功名与归隐的故事', '描写五禽戏的具体场景', '扩展对知音难觅的感受']}

In [45]:
# Evaluation system

In [35]:
# Output Format:
# When finalizing the poem (after ensuring strong alignment), return only this JSON object (with no extra text):

In [64]:
# S_P = "You are a specialized evaluation system. Your role is to compare a newly generated poem against the original poem’s metadata and judge how well it aligns with the original author’s style, tone, thematic elements, etc. And then after analyzing and understanding, refine and re-generate it until it meets the criteria."
P = """
You are a specialized evaluation system. Your role is to compare a newly generated poem against the original poem’s metadata and judge how well it aligns with the original author’s style, tone, thematic elements, etc. 
And then after analyzing and understanding deeply, refine and re-generate best poem it until it meets the criteria for best alignment.

High-level Background Information:
The author’s poetry deeply explores introspection and existential themes of life, identity, and freedom, using vivid natural and everyday imagery alongside philosophical and spiritual elements such as night, rivers, rain, journeys, and a longing for home. The tone shifts between quiet reflection, raw emotion, hope, and yearning, while the free-verse structure employs direct, minimalist language with occasional symbolic or abstract expressions. Recurring motifs of distance, searching, and the tension between belonging and escape are prominent. Additionally, classical references, cultural touchstones, and personal anecdotes are woven throughout, creating a modern yet reflective style that provides consistent context for analyzing the author’s work. 
When given a poem by this author, analyze and summarize it according to the specified structure, any new poems generated or to be generated should maintain alignment with the author’s established style and themes.

Inputs:
- Original Poem Metadata:
    - Title: "{title}" 
    - Body: {body}
    - Tone: {tone}
    - Style: {style}
    - Imagery: {imagery}
    - Symbolism: {symbolism}
    - Themes: {themes}
    - Structure: {structure}
    - Poetic techniques: {poetic_techniques}
    - Emotional impact: {emotional_impact}
    - Possible expansions: {possible_expansions}
    
- A New Poem:
    - New Poem Title: "{generated_title}" 
    - New Poem Body: {generated_body}
    (Other fields to considerate for reference if have:
    - New Poem Tone: {generated_tone}
    - New Poem Style: {generated_style}
    - New Poem Imagery: {generated_imagery}
    - New Poem Symbolism: {generated_symbolism}
    - New Poem Themes: {generated_themes}
    - New Poem Structure: {generated_structure}
    - New Poem Poetic techniques: {generated_poetic_techniques}
    - New Poem Emotional impact: {generated_emotional_impact}
    - New Poem Possible expansions: {generated_possible_expansions}
    )

Evaluation Criteria:
Compare the new poem to the original metadata to ensure consistency with:
- Tone (mood, emotional ambience).
- Style (techniques, approach, influences).
- Body
- Imagery (prominent visual or sensory elements).
- Symbolism (figurative meaning, references).
- Themes (central ideas explored).
- Structure (free verse, stanza layout, rhythm).
- Poetic Techniques (metaphors, personification, rhetorical devices).
- Emotional Impact (shifts in mood, intensity).
- Possible Expansions (any suggested elaborations that fit naturally).

Constraints:
1. 绝对不要排比 (do not use repetitive or parallel structures).
2. Keep your newly generated poem concise, mirroring the original’s approximate length.
3. Do not directly quote or reuse lines verbatim from the original poem.
4. Maintain a coherent free-verse style that reflects the author’s introspective, existential, and nature-centered writing.

Output Format:
Please output only the JSON data as plain text without enclosing it in ```json` blocks or any other formatting.

  "match_score": 0,
  "quality_score": 0,
  "original_title": "",
  "closest_lines": [],
  "title": "",
  "body": ""

where:
- match_score (integer, 1–10) measures how closely the new poem aligns with the original’s style and themes.  
  - 0 or 1 = extremely different  
  - 10 = nearly identical in essence (but not copied).
- quality_score (integer, 0–10) indicates how well the poem meets a high standard befitting the author’s style.  
  - 0 = poor quality, ignoring metadata and style  
  - 10 = excellent quality, as though authored by the original poet.
- original_title is the exact original poem title.
- closest_lines contains brief excerpts or paraphrased elements from the new poem that best reflect the original’s mood or style (not literal copy).
- title is the new poem’s final title.
- body is the final text of the new poem after any necessary refinements.

Process:
1. Read and deeply analyze both the original metadata and the new poem.
2. Evaluate the provided new poem’s alignment with the metadata.
3. Evakltatw and re-think and re-generate a better-aligned version.
4. Provide the final JSON only after confirming alignment and quality.

"""

In [65]:
# Evaluation
import os 
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)


from pydantic import BaseModel
class PoemEval(BaseModel):
    match_score: int
    quality_score: int
    closest_lines: list[str]
    title: str
    body: str
    original_title: str

def evaluate_poem(poem, generated_poem, model="o1", temperature=0.7):
    """
    Compares a newly generated poem against the original poem’s metadata and
    returns an evaluation in JSON format, including match_score, quality_score, etc.
    """
    # Prepare original poem data
    title = poem.get("title", "Untitled")
    body = poem.get("body", "")
    tone = ", ".join(poem.get("tone", []))
    style = ", ".join(poem.get("style", []))
    imagery = ", ".join(poem.get("imagery", []))
    symbolism = ", ".join(poem.get("symbolism", []))
    themes = ", ".join(poem.get("themes", []))
    structure = ", ".join(poem.get("structure", []))
    poetic_techniques = ", ".join(poem.get("poetic_techniques", []))
    emotional_impact = ", ".join(poem.get("emotional_impact", []))
    possible_expansions = ", ".join(poem.get("possible_expansions", []))
    
    # Extract new poem data
    generated_title = generated_poem.get("title", "Untitled")
    generated_body = generated_poem.get("body", "")
    generated_tone = ", ".join(generated_poem.get("tone", []))
    generated_style = ", ".join(generated_poem.get("style", []))
    generated_imagery = ", ".join(generated_poem.get("imagery", []))
    generated_symbolism = ", ".join(generated_poem.get("symbolism", []))
    generated_themes = ", ".join(generated_poem.get("themes", []))
    generated_structure = ", ".join(generated_poem.get("structure", []))
    generated_poetic_techniques = ", ".join(generated_poem.get("poetic_techniques", []))
    generated_emotional_impact = ", ".join(generated_poem.get("emotional_impact", []))
    generated_possible_expansions = ", ".join(generated_poem.get("possible_expansions", []))

    # Insert into prompt
    prompt = P.format(
        title=title,
        body=body,
        tone=tone,
        style=style,
        imagery=imagery,
        symbolism=symbolism,
        themes=themes,
        structure=structure,
        poetic_techniques=poetic_techniques,
        emotional_impact=emotional_impact,
        possible_expansions=possible_expansions,

        generated_title=generated_title,
        generated_body=generated_body,
        generated_tone=generated_tone,
        generated_style=generated_style,
        generated_imagery=generated_imagery,
        generated_symbolism=generated_symbolism,
        generated_themes=generated_themes,
        generated_structure=generated_structure,
        generated_poetic_techniques=generated_poetic_techniques,
        generated_emotional_impact=generated_emotional_impact,
        generated_possible_expansions=generated_possible_expansions,
    )

    # System + user messages
    messages = [
        # {"role": "system", "content": S_P},
        {"role": "user", "content": prompt}
    ]

    completion = client.beta.chat.completions.parse(
        model=model,
        messages=messages,
        temperature=temperature,
        # response_format=PoemEval
        # max_tokens=1024
    )
    content = completion.choices[0].message.content.strip()

    # Attempt to parse the response as JSON:
    try:
        eval_result = json.loads(content)
    
    except:
        # Fallback: if it's not valid JSON, wrap in a minimal structure
        eval_result = {
            "match_score": 0,
            "quality_score": 0,
            "closest_lines": [],
            "title": "Untitled",
            "body": content,
            "original_title": title
        }

    return eval_result

    # # Optionally validate against PoemEval schema
    # try:
    #     eval_data = PoemEval(**eval_result)
    # except:
    #     # If validation fails, you could handle or correct it here
    #     eval_data = PoemEval(
    #         match_score=0,
    #         quality_score=0,
    #         closest_lines=[],
    #         title="Untitled",
    #         body=content,
    #         original_title=original_title
    #     )



In [66]:
import json

with open("poems_features.json", "r", encoding="utf-8") as f:
    poems = json.load(f)

with open("augmented_poems_o1_mini.json", "r", encoding="utf-8") as f:
    generated_poems = json.load(f)

# Ensure both lists are of the same length
if len(poems) != len(generated_poems):
    raise ValueError("The two lists must have the same number of items.")

for poem, generated_poem in zip(poems, generated_poems):
    print(poem.get('title'), generated_poem.get('original_title'), generated_poem.get('title'))
    assert(generated_poem.get('original_title')==None or poem.get('title') == generated_poem.get('original_title'))
    # Process each corresponding pair
    # Example: print titles if they have a 'title' key
    # print(f"Original: {poem.get('title')}, Generated: {generated_poem.get('title')}")

河 河 流
雨的碎片 雨的碎片 风的低语
湖 湖 寒塘
等待与沉溺 等待与沉溺 迷失与静寂
桥头的抗争 桥头的抗争 桥边的融合
路边花开 路边花开 车辙花语
冬至 冬至 融雪
冬日的花序 冬日的花序 冬日新生
风吹云散，寻觅之影 风吹云散，寻觅之影 风带走云影
死亡 死亡 终章
呼吸之间 呼吸之间 呼吸边缘
忍冬的重量 忍冬的重量 月影的重量
利刃 利刃 锋影
雨的穿透 雨的穿透 雨幕中的重生
猫山 猫山 静山
厌恶 厌恶 自我拷问
太阳与蜜糖 太阳与蜜糖 光影之间
冻死的蜜蜂 None Untitled Expansion
痛苦的气泡 痛苦的气泡 痛苦的微光
光的蒸发 光的蒸发 月光的幻化
呼吸的形状 呼吸的形状 气息的轮廓
隔阂与屏障 隔阂与屏障 隔阂与屏障
癌症的夜晚 癌症的夜晚 康复的晨曦
血与树 血与树 剑与花
星星 星星 消逝
夜晚的抚慰 None Untitled Expansion
小石块 小石块 星尘
天空的裂缝 天空的裂缝 裂隙中的孤光
海与森林的沉默 海与森林的沉默 大海与树林的静默
火焰 火焰 燃烧的脉络
今夜的我 今夜的我 孤夜之下
黑夜 黑夜 夜幕下
雅鲁藏布江 雅鲁藏布江 长河守望
林芝卖水果的女人 林芝卖水果的女人 林芝卖水果的女人
逃离 逃离 离开地球的渴望
自由的雨 自由的雨 奔腾的风
安宁 安宁 沉静
被虫蛀的思想 被虫蛀的思想 心灵的蚀刻
扼住那些野花的喉咙 扼住那些野花的喉咙 掩盖黎明的阴影
记忆的味道 记忆的味道 回忆的余韵
平静的湖水 平静的湖水 永恒的森林
父亲的笨拙之爱 父亲的笨拙之爱 父亲沉默的力量
行囊 行囊 沉重的行囊
救赎之路 救赎之路 穿越黎明
回家的意义 回家的意义 回家的意义
漂泊与选择 漂泊与选择 漂泊与选择
远方的故乡 远方的故乡 远方的故乡
命运的定义 命运的定义 身份的枷锁
觉醒与平等 觉醒与平等 自由的回响
爱与本真 爱与本真 本真的爱
一生的瞬间 一生的瞬间 生命中的光芒
幸福的祈愿 幸福的祈愿 幸福的祈愿
传说的风景 传说的风景 追寻的航程
束缚的生命 束缚的生命 沉寂的束缚
书店的命运 书店的命运 书架的邂逅
污泥 污泥 深泥
大山 大山 巍峨之路
沙场 沙场 沙场
功名与归隐 功名与归隐 岁暮归心
魔方 魔方 遗失的魔方
妹妹的世界 妹妹的世界 色彩的真相
在法兰德斯的罂粟花 在法兰德

In [69]:
for poem, generated_poem in zip(poems[:1], generated_poems[:1]):
    eval_poem = evaluate_poem(poem, generated_poem, model = "o1-mini", temperature=1) # new_version = expand_poem(poem, model="o1", temperature=0.7)
    eval_poems.append(eval_poem)
    print(eval_poem)

# Now you have new poems that are expansions/variations in the same style.
print(f"Evaluated {len(eval_poems)} expansions from {len(poems)} base poems.")

{'match_score': 8, 'quality_score': 9, 'original_title': '河', 'closest_lines': ['在河水干涸之前', '清澈的河流', '巍峨的山峰下', '山顶覆盖着厚厚的积雪'], 'title': '流', 'body': '在河水干涸之前\n我轻放一只虫在那里\n让它适应湿润却带有腐败的空气\n它却舞动得如同蝶翼\n清澈的河流平淡无味\n穿行在巍峨的山峰下\n山顶覆盖着厚厚的积雪'}
Evaluated 1 expansions from 62 base poems.


In [68]:
eval_poems = []

In [70]:
# eval_poems
generated_poems[0], eval_poems[0]

({'title': '流',
  'body': '在河水干涸之前\n我轻放一片叶子在那里\n让它适应湿润却带有腐败的空气\n它却舞动得如同蝶翼\n清澈的河流平淡无味\n穿行在巍峨的山峰下\n山顶覆盖着厚厚的积雪',
  'tone': ['沉思', '隐约的忧伤', '自然的敬畏'],
  'style': ['简洁的语言', '自由诗体', '哲学性的思考'],
  'imagery': ['河水', '叶子', '山峰', '积雪'],
  'symbolism': ['河水象征生命的流逝', '叶子象征生命的脆弱与适应', '山峰象征永恒与自然的力量'],
  'themes': ['生命的短暂与永恒', '自然与人类的关系', '适应与变化'],
  'structure': ['自由的行分', '无固定韵律', '自然流畅的叙述'],
  'poetic_techniques': ['隐喻', '对比', '象征'],
  'emotional_impact': ['从平静到深思', '引发对生命与自然的感悟'],
  'possible_expansions': ['探讨更多自然元素的象征意义', '加入人类活动对自然的影响', '扩展对河流的历史与未来的描写'],
  'original_title': '河'},
 {'match_score': 8,
  'quality_score': 9,
  'original_title': '河',
  'closest_lines': ['在河水干涸之前', '清澈的河流', '巍峨的山峰下', '山顶覆盖着厚厚的积雪'],
  'title': '流',
  'body': '在河水干涸之前\n我轻放一只虫在那里\n让它适应湿润却带有腐败的空气\n它却舞动得如同蝶翼\n清澈的河流平淡无味\n穿行在巍峨的山峰下\n山顶覆盖着厚厚的积雪'})

In [67]:
import json

with open("eval_poems1.json", "w", encoding="utf-8") as f:
    json.dump(eval_poems, f, indent=2, ensure_ascii=False)


In [None]:
training_lines = []
for poem in augmented_dataset:
    # If there's a good title, use it as the 'prompt'; otherwise use a short description
    if poem.get("title"):
        prompt_str = f"Title: {poem['title']}\nWrite a poem in that style."
    else:
        # fallback if no title
        prompt_str = f"Write a poem about {poem['tone'] or 'an emotional scene'} in my style."
    
    completion_str = poem["body"]

    training_lines.append({
        "prompt": prompt_str,
        "completion": completion_str
    })

# Dump to JSONL
with open("train.jsonl", "w", encoding="utf-8") as f:
    for line in training_lines:
        f.write(json.dumps(line, ensure_ascii=False) + "\n")


In [None]:
# mlx lora \
#     --model my-base-model-name \
#     --data ./poem_data \
#     --output_dir ./poem_lora_output \
#     --batch_size 2 \
#     --learning_rate 1e-4 \
#     --epochs 3


In [None]:
# import json
# from pydantic import BaseModel

# EVALUATION_PROMPT_TEMPLATE = """
# 请比较以下两首诗：一首是原始诗歌（含元数据），另一首是新生成的诗歌。请根据下列要求进行评估并在必要时重新生成诗歌，使其与原作更契合。最终只输出 JSON，包含以下字段：

# {
#   "match_score": 0,
#   "quality_score": 0,
#   "original_title": "",
#   "closest_lines": [],
#   "title": "",
#   "body": ""
# }

# **评估说明**：
# 1. 比较新诗与原诗在以下方面的契合度：
#    - Tone (情感氛围)
#    - Style (写作技巧、风格)
#    - Imagery (主要意象)
#    - Symbolism (象征意义)
#    - Themes (主题)
#    - Structure (形式、节奏)
#    - Poetic Techniques (修辞、比喻等)
#    - Emotional Impact (情感演变)
#    - Possible Expansions (诗歌可拓展的思路)

# 2. 绝对不要排比。不要直接复制原文中的任何整行文本。

# 3. 如果新诗与原始诗歌不够契合，请先“重新思考、重新生成”后，再输出完全契合的最终版本。

# 4. 只输出 JSON，不要额外的解释或文本。

# ---
# **原始诗歌元数据**：
# - 标题: {original_title}
# - 正文: {original_body}
# - Tone: {original_tone}
# - Style: {original_style}
# - Imagery: {original_imagery}
# - Symbolism: {original_symbolism}
# - Themes: {original_themes}
# - Structure: {original_structure}
# - Poetic Techniques: {original_poetic_techniques}
# - Emotional Impact: {original_emotional_impact}
# - Possible Expansions: {original_possible_expansions}

# ---
# **新生成的诗歌**：
# - 标题: {generated_title}
# - 正文: {generated_body}
# """

# class PoemEval(BaseModel):
#     match_score: int
#     quality_score: int
#     closest_lines: list[str]
#     title: str
#     body: str
#     original_title: str

# def evaluate_poem(original_poem: dict, generated_poem: dict, model="o1", temperature=0.7):
#     """
#     Compares a newly generated poem against the original poem’s metadata and
#     returns an evaluation in JSON format, including match_score, quality_score, etc.
#     """
#     # Extract original poem metadata
#     original_title = original_poem.get("title", "Untitled")
#     original_body = original_poem.get("body", "")
#     original_tone = ", ".join(original_poem.get("tone", []))
#     original_style = ", ".join(original_poem.get("style", []))
#     original_imagery = ", ".join(original_poem.get("imagery", []))
#     original_symbolism = ", ".join(original_poem.get("symbolism", []))
#     original_themes = ", ".join(original_poem.get("themes", []))
#     original_structure = ", ".join(original_poem.get("structure", []))
#     original_poetic_techniques = ", ".join(original_poem.get("poetic_techniques", []))
#     original_emotional_impact = ", ".join(original_poem.get("emotional_impact", []))
#     original_possible_expansions = ", ".join(original_poem.get("possible_expansions", []))

#     # Extract new poem data
#     generated_title = generated_poem.get("title", "Untitled")
#     generated_body = generated_poem.get("body", "")

#     # Build the evaluation prompt
#     prompt = EVALUATION_PROMPT_TEMPLATE.format(
#         original_title=original_title,
#         original_body=original_body,
#         original_tone=original_tone,
#         original_style=original_style,
#         original_imagery=original_imagery,
#         original_symbolism=original_symbolism,
#         original_themes=original_themes,
#         original_structure=original_structure,
#         original_poetic_techniques=original_poetic_techniques,
#         original_emotional_impact=original_emotional_impact,
#         original_possible_expansions=original_possible_expansions,
#         generated_title=generated_title,
#         generated_body=generated_body
#     )

    
#     content = completion.choices[0].message.content.strip()

#     # Attempt to parse the response as JSON
#     try:
#         eval_result = json.loads(content)
#     except:
#         # Fallback: if it's not valid JSON, wrap in a minimal structure
#         eval_result = {
#             "match_score": 0,
#             "quality_score": 0,
#             "closest_lines": [],
#             "title": "Untitled",
#             "body": content,
#             "original_title": original_title
#         }

#     # Optionally validate against PoemEval schema
#     try:
#         eval_data = PoemEval(**eval_result)
#     except:
#         # If validation fails, you could handle or correct it here
#         eval_data = PoemEval(
#             match_score=0,
#             quality_score=0,
#             closest_lines=[],
#             title="Untitled",
#             body=content,
#             original_title=original_title
#         )

#     return eval_data.dict()
