In [3]:
import pandas as pd

# Read the input CSV
df = pd.read_csv("FAZ_POEMS_REVIEW - Semantic_search.csv")

# Verify required columns exist
required = ['poem_id', 'chunk_id', 'Title_cleaned', 'Poem_line_cleaned']
for col in required:
    if col not in df.columns:
        raise ValueError(f"Missing column: {col}")

# Sort by poem_id and chunk_id to ensure proper ordering
df = df.sort_values(['poem_id', 'chunk_id']).reset_index(drop=True)

# Build full poems for each poem_id by concatenating all chunks
poem_full_map = {}
for poem_id, group in df.groupby('poem_id'):
    # Join all chunks for this poem_id in order (sorted by chunk_id)
    ordered_chunks = group.sort_values('chunk_id')['Poem_line_cleaned'].tolist()
    full_poem = '\n'.join(ordered_chunks)
    poem_full_map[poem_id] = full_poem

# Add the Full_poem column to the dataframe
df['Full_poem'] = df['poem_id'].map(poem_full_map)

# Optionally rename Poem_line_cleaned to poem_lines_subset if needed
df = df.rename(columns={'Poem_line_cleaned': 'poem_lines_subset'})

# All original columns plus the new Full_poem column are preserved
output_df = df

# Save to output CSV
output_df.to_csv("05-FAZ3_POEMS_Chunked_for_semantic.csv", index=False, encoding='utf-8-sig')

print(f"✅ Done! {len(output_df)} rows generated.")

✅ Done! 1418 rows generated.
