In [1]:
# ==========================================================
# LSTM 舞蹈敘事生成器：JSON(frames) -> 詩意段落
# ==========================================================

import json, os, random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# ---------------- CONFIG ----------------
JSON_PATH = "ballet_multi_part_clusters.json"
TARGET_KEY = "Ballet_40.csv"     # 可任意更換
WINDOW_LEN = 8
STEP = 4
MAX_SAMPLES = None
EMBEDDING_DIM = 64
LATENT_DIM = 256
BATCH_SIZE = 32
EPOCHS = 20
MODEL_DIR = "ballet_lstm_model"

In [3]:
# ---------------- MOVEMENT DICTIONARIES ----------------
BODY_PARTS = ["Head","LeftArm","RightArm","LeftLeg","RightLeg","Torso"]
movement_dict = {
    "Head": {0:"頭部直立",1:"微微低頭",2:"頭向左偏",3:"頭向右偏",4:"頭部快速轉動",5:"頭部不穩定"},
    "LeftArm": {0:"左手自然下垂",1:"左手半舉",2:"左手高舉",3:"左手向前伸",4:"左手橫向展開",5:"左手轉換中"},
    "RightArm": {0:"右手自然下垂",1:"右手舉至中段高度",2:"右手完全抬高",3:"右手橫向展開",4:"右手向前伸出",5:"右手轉換中"},
    "LeftLeg": {0:"左腳著地站立",1:"左腳抬起",2:"左腳側向伸展",3:"左腳後伸",4:"左腳屈膝準備動作",5:"左腳跳躍或過渡"},
    "RightLeg": {0:"右腳著地站立",1:"右腳抬起",2:"右腳伸展或踢腿",3:"右腳後伸",4:"右腳屈膝準備跳或轉",5:"右腳快速移動或跳躍"},
    "Torso": {0:"軀幹直立穩定",1:"軀幹微前傾",2:"軀幹後仰",3:"軀幹側傾",4:"軀幹扭轉",5:"軀幹不穩定"}
}

In [4]:
# ---------------- LOAD FRAMES ----------------
def load_frames(json_path, key):
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    if key not in data:
        raise KeyError(f"{key} not found in JSON. Available keys: {list(data.keys())}")
    frames = [list(map(int, fr)) for fr in data[key] if len(fr) >= 6]
    return frames

In [5]:
# ---------------- POETIC TEMPLATES ----------------
POETIC_TEMPLATES = [
    "舞者{desc}，彷彿{image}。",
    "她{desc}，像是在{image}。",
    "{desc}，如同{image}在呼吸。",
    "在光裡，舞者{desc}，那是{image}的瞬間。",
    "當{desc}時，她的心也{image}。",
    "{desc}之間，像是記憶被風吹動。",
    "她在{desc}之間，尋找{image}。",
    "隨著光線的流動，{desc}，那是一種{image}。",
    "靜止中，{desc}，彷彿時間在{image}。"
]

SYMBOLIC_MAP = {
    "高舉": ["追尋光", "擁抱黎明", "伸向天空"],
    "抬起": ["掙脫地心", "伸向遠方", "指向未來"],
    "展開": ["釋放", "綻放", "開啟"],
    "扭轉": ["時間的皺摺", "內在的回聲", "意識的迴旋"],
    "低頭": ["回望", "沉思", "與影子對話"],
    "伸": ["觸摸未知", "尋找平衡", "撫過空氣"],
    "跳躍": ["掙脫地心", "飛起的願望", "破碎的重力"],
    "後仰": ["沉入記憶", "回到夢裡", "墜入過去"],
    "側傾": ["在邊界搖晃", "尋找失重", "接近流動"],
    "穩定": ["成為風的軸心", "讓時間靜止", "在呼吸裡存在"]
}

CONNECTORS = ["接著", "隨後", "同時", "之後", "此刻", "慢慢地", "忽然", "她感覺到", "光線轉換之時"]

In [6]:
# ---------------- RULE-BASED POETRY ----------------
def window_to_rule_poetry(window_frames):
    n = len(window_frames)
    changes = {i: set() for i in range(6)}
    for i in range(1, n):
        for j in range(6):
            if window_frames[i][j] != window_frames[i-1][j]:
                changes[j].add(window_frames[i][j])

    descs = []
    if changes[1] or changes[2]:
        if changes[1] and changes[2]:
            descs.append("雙手" + random.choice(["高舉", "展開", "伸出"]))
        elif changes[1]:
            descs.append("左手" + random.choice(["舉起", "伸出", "下垂"]))
        elif changes[2]:
            descs.append("右手" + random.choice(["舉起", "伸出", "下垂"]))
    if changes[3] or changes[4]:
        if changes[3] and changes[4]:
            descs.append("雙腿" + random.choice(["站穩", "抬起", "伸展"]))
        elif changes[3]:
            descs.append("左腳" + random.choice(["抬起", "伸展", "著地"]))
        elif changes[4]:
            descs.append("右腳" + random.choice(["抬起", "伸展", "著地"]))
    if changes[5]:
        descs.append("軀幹" + random.choice(["前傾", "後仰", "扭轉", "直立"]))
    if changes[0]:
        descs.append("頭部" + random.choice(["低垂", "轉向", "上揚"]))
    if not descs:
        descs = ["姿態平穩"]

    desc = " ".join(descs)
    matched_keys = [k for k in SYMBOLIC_MAP.keys() if any(k in d for d in descs)]
    if matched_keys:
        key = random.choice(matched_keys)
        image = random.choice(SYMBOLIC_MAP[key])
    else:
        image = random.choice(sum(SYMBOLIC_MAP.values(), []))
    template = random.choice(POETIC_TEMPLATES)
    return template.format(desc=desc, image=image)

In [7]:
# ---------------- BUILD DATASET ----------------
def build_dataset(frames, window_len, step, max_samples=None):
    X_texts, Y_texts = [], []
    n = len(frames)
    for start in range(0, n - window_len + 1, step):
        win = frames[start:start + window_len]
        inp = " | ".join([" ".join(map(str, f)) for f in win])
        out = "\t" + window_to_rule_poetry(win) + "\n"
        X_texts.append(inp)
        Y_texts.append(out)
        if max_samples and len(X_texts) >= max_samples:
            break
    return X_texts, Y_texts

In [8]:
# ---------------- TOKENIZERS ----------------
def prepare_tokenizers(X_texts, Y_texts):
    tok_in = Tokenizer(filters='', split=' ', oov_token='<OOV>')
    tok_in.fit_on_texts(X_texts)
    tok_out = Tokenizer(char_level=True, filters='', oov_token='<OOV>')
    tok_out.fit_on_texts(Y_texts)
    return tok_in, tok_out

In [9]:
# ---------------- SEQ2SEQ ----------------
def build_seq2seq(num_encoder_tokens, num_decoder_tokens):
    encoder_inputs = Input(shape=(None,))
    enc_emb = Embedding(num_encoder_tokens, EMBEDDING_DIM, mask_zero=True)(encoder_inputs)
    _, state_h, state_c = LSTM(LATENT_DIM, return_state=True)(enc_emb)
    encoder_states = [state_h, state_c]

    decoder_inputs = Input(shape=(None,))
    dec_emb_layer = Embedding(num_decoder_tokens, EMBEDDING_DIM, mask_zero=True)
    dec_emb = dec_emb_layer(decoder_inputs)
    decoder_lstm = LSTM(LATENT_DIM, return_sequences=True, return_state=True)
    dec_out, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
    dec_dense = Dense(num_decoder_tokens, activation='softmax')
    dec_out = dec_dense(dec_out)
    model = Model([encoder_inputs, decoder_inputs], dec_out)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    return model, (encoder_inputs, encoder_states, dec_emb_layer, decoder_lstm, dec_dense)

In [10]:
# ---------------- TRAIN + SAVE ----------------
def train_and_save_model(X_texts, Y_texts):
    tok_in, tok_out = prepare_tokenizers(X_texts, Y_texts)
    X_seq = tok_in.texts_to_sequences(X_texts)
    Y_seq = tok_out.texts_to_sequences(Y_texts)
    max_enc_len = max(len(s) for s in X_seq)
    max_dec_len = max(len(s) for s in Y_seq)
    enc_in = pad_sequences(X_seq, maxlen=max_enc_len, padding='post')
    dec_in = pad_sequences([s[:-1] for s in Y_seq], maxlen=max_dec_len - 1, padding='post')
    dec_tg = pad_sequences([s[1:] for s in Y_seq], maxlen=max_dec_len - 1, padding='post')
    dec_tg = np.expand_dims(dec_tg, -1)

    model, comp = build_seq2seq(len(tok_in.word_index)+1, len(tok_out.word_index)+1)
    model.fit([enc_in, dec_in], dec_tg, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.1)
    return {"model": model, "tokenizer_in": tok_in, "tokenizer_out": tok_out,
            "max_enc_len": max_enc_len, "max_dec_len": max_dec_len,
            "encoder_inputs": comp[0], "encoder_states": comp[1],
            "dec_emb_layer": comp[2], "decoder_lstm": comp[3], "decoder_dense": comp[4],
            "window_len": WINDOW_LEN, "step": STEP}

In [11]:
# ---------------- PARAGRAPH DECODER ----------------
def decode_paragraph(frames, artefacts, top_k=10, temperature=0.8, connector_prob=0.3):
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    paragraph, prev_state = [], None
    for i in range(0, len(frames) - artefacts['window_len'] + 1, artefacts['step']):
        win = frames[i:i + artefacts['window_len']]
        inp = " | ".join([" ".join(map(str, f)) for f in win])
        paragraph.append(window_to_rule_poetry(win))
        if random.random() < connector_prob:
            paragraph.append(random.choice(CONNECTORS) + "，")
    return smart_paragraph_formatter(paragraph)

def smart_paragraph_formatter(sentences, max_per_line=3):
    lines, buf = [], []
    for s in sentences:
        buf.append(s.strip())
        if len(buf) >= random.randint(2, max_per_line):
            lines.append(" ".join(buf))
            buf = []
    if buf:
        lines.append(" ".join(buf))
    return "\n".join(lines)

In [12]:
# ---------------- MAIN ----------------
def run_pipeline():
    frames = load_frames(JSON_PATH, TARGET_KEY)
    print("Loaded frames:", len(frames))
    X_texts, Y_texts = build_dataset(frames, WINDOW_LEN, STEP, MAX_SAMPLES)
    print("Built dataset samples:", len(X_texts))
    artefacts = train_and_save_model(X_texts, Y_texts)
    print("\n==== Adaptive Paragraph Inference Demo ====")
    paragraph = decode_paragraph(frames, artefacts, top_k=20, temperature=1.0)
    print(paragraph)

In [13]:
# ---------------- EXECUTE ----------------
if __name__ == "__main__":
    for gpu in tf.config.experimental.list_physical_devices('GPU'):
        try: tf.config.experimental.set_memory_growth(gpu, True)
        except: pass
    run_pipeline()

Loaded frames: 593
Built dataset samples: 147
Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 179ms/step - loss: 4.7995 - val_loss: 4.7369
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 112ms/step - loss: 4.4885 - val_loss: 4.2362
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 102ms/step - loss: 4.0561 - val_loss: 4.0445
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - loss: 3.8779 - val_loss: 3.9534
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - loss: 3.7190 - val_loss: 3.8336
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 108ms/step - loss: 3.6164 - val_loss: 3.7896
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 137ms/step - loss: 3.5168 - val_loss: 3.8155
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - loss: 3.4577 - val_loss: 3.7283
Epoch 9/20
[