## format txt file

In [1]:
def split_long_lines(filename, folder="scripts", max_length=80):
    # Open the file for reading
    with open(folder + "/" + filename, 'r') as file:
        lines = file.readlines()

    new_lines = []
    
    # Iterate through each line
    for line in lines:
        while len(line) > max_length:
            # Find the last space before the max_length
            split_index = line[:max_length].rfind(' ')
            if split_index == -1:
                split_index = max_length  # No space found, split at max_length
                
            # Append the portion of the line up to the split point
            new_lines.append(line[:split_index].strip() + '\n')
            
            # Continue processing the rest of the line
            line = line[split_index:].strip()
        
        new_lines.append(line + '\n')  # Add the rest of the line (if any) or short lines directly

    # remove any lines that are 'XXX'
    new_lines = [line for line in new_lines if line.strip() != 'XXX']
    
    # Write the modified lines back to the file or a new file
    with open(folder + '/split_' + filename, 'w') as file:
        file.writelines(new_lines)

In [75]:
story = "whispers"

In [77]:
split_long_lines(f'{story}.txt')

In [3]:
import re 

def remove_extra_newlines(filename, folder):
    # Open the file for reading
    with open(folder + "/" + filename, 'r') as file:
        lines = file.read()
    
    # Replace 3+ newlines with just 2 newlines
    cleaned_lines = re.sub(r'\n{3,}', '\n\n', lines)

    # Write the cleaned content back to the file (or a new file)
    with open(folder + "/" + filename, 'w') as file:
        file.write(cleaned_lines)

In [78]:
remove_extra_newlines(f'split_{story}.txt', 'scripts')

## check if any locations/characters are missing

In [None]:
import json 

# Load the JSON data from a file
with open(f"json/{story}/story_outline.json", "r") as file:
    data = json.load(file)

# Extract the locations from the locations object
all_locations = {location["name"] for location in data["locations"]}

# Extract the locations used in scenes
scene_locations = {scene["location"] for scene in data["scenes"]}

# Check if each location in "locations" is included in "scenes"
missing_locations = all_locations - scene_locations

# also other way around
extra_locations = scene_locations - all_locations

# Display the result
if missing_locations:
    print("The following locations are not included in any scene:")
    for location in missing_locations:
        print(f"- {location}")
else:
    print("All locations are included in at least one scene.")

if extra_locations:
    print("The following locations are not defined but used in scenes:")
    for location in extra_locations:
        print(f"- {location}")
else:
    print("All locations used in scenes are defined.")

All locations are included in at least one scene.
All locations used in scenes are defined.


In [20]:
# check characters too
all_characters = {character["name"] for character in data["characters"]}
scene_characters = {character["name"] for scene in data["scenes"] for character in scene["characters"]}
missing_characters = all_characters - scene_characters
extra_characters = scene_characters - all_characters

if missing_characters:
    print("\nThe following characters are not included in any scene:")
    for character in missing_characters:
        print(f"- {character}")

if extra_characters:
    print("\nThe following characters are not defined but used in scenes:")
    for character in extra_characters:
        print(f"- {character}")

In [21]:
# print total number of scenes, characters and locations
print(f"\nTotal number of scenes: {len(data['scenes'])}")
print(f"Total number of characters: {len(data['characters'])}")
print(f"Total number of locations: {len(data['locations'])}")


Total number of scenes: 56
Total number of characters: 34
Total number of locations: 21


## character/location counting

In [None]:
# read in json
import json

with open(f'json/{story}/story_outline.json', 'r') as file:
    story_outline = json.load(file)

scenes = story_outline['scenes']
chapters = story_outline['chapters']
characters = story_outline['characters']
locations = story_outline['locations']

In [54]:
# count how many scenes each character is in per chapter
character_scene_count = {}

for scene in scenes:
    chapter = scene['chapter']  # Ensure this is a string or integer
    if chapter not in character_scene_count:
        character_scene_count[chapter] = {}
        
    for character in scene['characters']:
        character_name = character['name']  # Extract a unique identifier from the character dictionary
        if character_name not in character_scene_count[chapter]:
            character_scene_count[chapter][character_name] = 1
        else:
            character_scene_count[chapter][character_name] += 1

character_scene_count

{'Chapter 1': {'Mei': 4, 'Zhang': 4, 'Lin': 1, 'Li Yuan': 1, 'Lao Feng': 1},
 'Chapter 2': {'Mei': 5,
  'Zhang': 2,
  'Farid': 2,
  'Kaveh': 2,
  'Lin': 1,
  'Old Wang': 1,
  'Xiu': 1},
 'Chapter 3': {'Mei': 5,
  'Farid': 2,
  'Old Wang': 1,
  'Kaveh': 2,
  'Jia': 1,
  'Hadi': 1,
  'Badr': 1,
  'Aisha': 1},
 'Chapter 4': {'Mei': 3,
  'Zhang': 3,
  'Farid': 3,
  'Aisha': 2,
  'Kaveh': 2,
  'Saeed': 2,
  'Yara': 2,
  'Yao': 1,
  'Jin': 1},
 'Chapter 5': {'Mei': 4,
  'Kaveh': 2,
  'Zhang': 3,
  'Li Wei': 1,
  'Zhao': 1,
  'Kalden': 1},
 'Chapter 6': {'Mei': 5,
  'Zhang': 2,
  'Farid': 2,
  'Aliya': 2,
  'Kaveh': 2,
  'Bahram': 1,
  'Sohrab': 1,
  'Lin': 1},
 'Chapter 7': {'Mei': 5,
  'Zhang': 1,
  'Farid': 2,
  'Jin': 1,
  'Kaveh': 2,
  'Nasrin': 1,
  'Rashid': 1,
  'Aliya': 1},
 'Chapter 8': {'Mei': 5,
  'Farid': 2,
  'Jin': 2,
  'Kaveh': 3,
  'Emir': 1,
  'Darya': 1,
  'Hassan': 1},
 'Chapter 9': {'Mei': 5,
  'Kaveh': 5,
  'Rashid': 3,
  'Tarek': 1,
  'Omar': 2,
  'Nasrin': 1,
  'Laila'

In [55]:
# repeat for locations
location_scene_count = {}

for scene in scenes:
    chapter = scene['chapter']
    if chapter not in location_scene_count:
        location_scene_count[chapter] = {}
        
    location_name = scene['location']
    if location_name not in location_scene_count[chapter]:
        location_scene_count[chapter][location_name] = 1
    else:
        location_scene_count[chapter][location_name] += 1

location_scene_count

{'Chapter 1': {"Mei's family home": 3, 'Tea market': 1},
 'Chapter 2': {"Chang'an city gates": 1, 'Caravan encampment': 4},
 'Chapter 3': {'Gobi Desert': 2, 'Oasis stop': 3},
 'Chapter 4': {'Desert camp': 3, 'Bandit hideout in dunes': 1},
 'Chapter 5': {'Caravanserai': 4},
 'Chapter 6': {'Caravan encampment': 2, "Healer's tent in the desert": 3},
 'Chapter 7': {'Mountains near Samarkand': 5},
 'Chapter 8': {'Samarkand bazaar': 3, "Emir's guesthouse": 2},
 'Chapter 9': {"Kaveh's quarters": 2,
  'Secret meeting place': 1,
  'Samarkand': 2},
 'Chapter 10': {'Caravan route through Persia': 1, "Prince's encampment": 4},
 'Chapter 11': {'Persian palace': 3, 'Royal gardens': 2},
 'Chapter 12': {'Persian palace': 2, 'Caravan home camp': 2}}

In [56]:
for chapter in chapters:
    print(chapter['chapter'])

Chapter 1: The Merchant's Daughter
Chapter 2: Caravan of Secrets
Chapter 3: An Encounter in the Desert
Chapter 4: Bandits in the Night
Chapter 5: The Town of Lost Souls
Chapter 6: The Scroll of Secrets
Chapter 7: An Oath to Complete the Journey
Chapter 8: The Golden Bazaar
Chapter 9: Letters in the Dark
Chapter 10: A Prince in Disguise
Chapter 11: The Heart and the Choice
Chapter 12: Whispers of the Silk Road


In [57]:
for chapter in character_scene_count:
    print(chapter)

Chapter 1
Chapter 2
Chapter 3
Chapter 4
Chapter 5
Chapter 6
Chapter 7
Chapter 8
Chapter 9
Chapter 10
Chapter 11
Chapter 12


In [None]:
# update counts in chapter json object
for chapter in chapters:
    chap_characters = chapter['characters']
    chap_locations = chapter['locations']
    print(chapter['chapter'])
    if ':' in chapter['chapter']:
        chap_name = chapter['chapter'].split(':')[0]
    else:
        chap_name = chapter['chapter']

    for character in chap_characters:
        new_dict = {"role": chap_characters[character], "count": character_scene_count[chap_name][character]}
        chap_characters[character] = new_dict
    
    for location in chap_locations:
        new_dict = {"role": chap_locations[location], "count": location_scene_count[chap_name][location]}
        chap_locations[location] = new_dict


Chapter 1: The Merchant's Daughter
Chapter 2: Caravan of Secrets
Chapter 3: An Encounter in the Desert
Chapter 4: Bandits in the Night
Chapter 5: The Town of Lost Souls
Chapter 6: The Scroll of Secrets
Chapter 7: An Oath to Complete the Journey
Chapter 8: The Golden Bazaar
Chapter 9: Letters in the Dark
Chapter 10: A Prince in Disguise
Chapter 11: The Heart and the Choice
Chapter 12: Whispers of the Silk Road


In [59]:
chapters


[{'chapter': "Chapter 1: The Merchant's Daughter",
  'summary': 'Mei prepares for a journey on the Silk Road with her father.',
  'description': 'Mei, the daughter of a respected tea merchant, helps her father pack for their Silk Road journey. She yearns for adventure but is told her place is at home. Circumstances change suddenly, and she is asked to accompany him, stepping into a world far beyond her imagination.',
  'importance': 0.6,
  'conflict': 0.2,
  'locations': {"Mei's family home": {'role': "Chang'an, Mei's family home and the starting point for the journey.",
    'count': 3},
   'Tea market': {'role': "Local market in Chang'an where Mei's father trades.",
    'count': 1}},
  'characters': {'Mei': {'role': 'Protagonist, daughter of a tea merchant.',
    'count': 4},
   'Zhang': {'role': "Mei's father, a respected merchant.", 'count': 4},
   'Lin': {'role': "Mei's servant and friend.", 'count': 1},
   'Lao Feng': {'role': "Local merchant in Chang'an.", 'count': 1},
   'Li Yua

In [None]:
# update json file
with open(f'json/{story}/story_outline.json', 'w') as file:
    new_outline = {  
        "title": "Whispers of the Tea Route",
        "type": "Book",
        "author": "ChatGPT",
        "year": 2024,
        "num_chapters": 12,
        "num_scenes": 56,
        "num_characters": 34,
        "num_locations": 21,
    }
    new_outline['chapters'] = chapters
    new_outline['scenes'] = scenes
    new_outline['characters'] = characters
    new_outline['locations'] = locations
    json.dump(new_outline, file, indent=4)

In [61]:
print(len(scenes))
print(len(characters))
print(len(locations))
print(len(chapters))

56
34
21
12


## rank by importance and conflict

In [121]:
# read in json
import json

with open(f'json/{story}/story_outline.json', 'r') as file:
    story_outline = json.load(file)

scenes = story_outline['scenes']
chapters = story_outline['chapters']
characters = story_outline['characters']
locations = story_outline['locations']

In [122]:
# rank each scene by importance
# and within each scene, the characters

for i, chapter in enumerate(chapters):
    importances = []
    conflicts = []
    num_scenes = 0

    if ':' in chapter['chapter']:
        chap_name = chapter["chapter"].split(":")[0]
    else:
        chap_name = chapter["chapter"]
    
    # extract importance from each scene
    for j, scene in enumerate(scenes):
        if scene["chapter"] != chap_name:
            continue
        importances.append((j, scene["importance"]))
        conflicts.append((j, scene["conflict"]))
        # now extract character importances
        character_importances = []
        for k, character in enumerate(scene["characters"]):
            character_importances.append((k, character["importance"]))
        # sort character importances
        sorted_character_importances = sorted(character_importances, key=lambda x: x[1], reverse=True)
        # add importance_rank to each character
        for k, (l, _) in enumerate(sorted_character_importances):
            scenes[j]["characters"][l]["importance_rank"] = k+1
        # add number of each scene
        scenes[j]["number"] = j+1
        num_scenes += 1
    # sort importances
    sorted_importances = sorted(importances, key=lambda x: x[1], reverse=True)
    # add importance_rank to each scene
    for k, (j, _) in enumerate(sorted_importances):
        scenes[j]["importance_rank"] = k+1
    # sort conflicts
    sorted_conflicts = sorted(conflicts, key=lambda x: x[1], reverse=True)
    # add conflict_rank to each scene
    for k, (j, _) in enumerate(sorted_conflicts):
        scenes[j]["conflict_rank"] = k+1
    
    # add number of scenes to chapter
    chapters[i]["scenes"] = num_scenes
    
    # print results
    # for j, scene in enumerate(scenes):
    #     print(scenes[j]["title"], scenes[j]["importance_rank"])
    #     print("--------------------------------")
    #     for k, character in enumerate(scene["characters"]):
    #         print(character["name"], character["importance_rank"])
    #     print()

In [123]:
scenes

[{'title': 'Packing for the Journey',
  'number': 1,
  'summary': 'Mei assists her father in preparing for the Silk Road journey.',
  'chapter': 'Chapter 1',
  'location': "Mei's family home",
  'importance': 0.5,
  'conflict': 0.1,
  'characters': [{'name': 'Mei',
    'role': 'protagonist, daughter of a tea merchant',
    'importance': 1.0,
    'emotion': 'eager yet uncertain',
    'sentiment': 0.3,
    'importance_rank': 1},
   {'name': 'Zhang',
    'role': "Mei's father, a respected merchant",
    'importance': 0.9,
    'emotion': 'focused on preparations',
    'sentiment': 0.2,
    'importance_rank': 2},
   {'name': 'Lin',
    'role': "Mei's servant and friend",
    'importance': 0.3,
    'emotion': 'supportive',
    'sentiment': 0.4,
    'importance_rank': 3}],
  'importance_rank': 4,
  'conflict_rank': 4},
 {'title': 'Yearning for Adventure',
  'number': 2,
  'summary': 'Mei expresses her desire to join the journey but faces resistance.',
  'chapter': 'Chapter 1',
  'location': "

In [124]:
chapters

[{'chapter': "Chapter 1: The Merchant's Daughter",
  'summary': 'Mei prepares for a journey on the Silk Road with her father.',
  'description': 'Mei, the daughter of a respected tea merchant, helps her father pack for their Silk Road journey. She yearns for adventure but is told her place is at home. Circumstances change suddenly, and she is asked to accompany him, stepping into a world far beyond her imagination.',
  'importance': 0.6,
  'conflict': 0.2,
  'locations': {"Mei's family home": {'role': "Chang'an, Mei's family home and the starting point for the journey.",
    'count': 3},
   'Tea market': {'role': "Local market in Chang'an where Mei's father trades.",
    'count': 1}},
  'characters': {'Mei': {'role': 'Protagonist, daughter of a tea merchant.',
    'count': 4},
   'Zhang': {'role': "Mei's father, a respected merchant.", 'count': 4},
   'Lin': {'role': "Mei's servant and friend.", 'count': 1},
   'Lao Feng': {'role': "Local merchant in Chang'an.", 'count': 1},
   'Li Yua

In [125]:
# update json file
with open(f'json/{story}/story_outline.json', 'w') as file:
    new_outline = {
        "title": "Whispers of the Tea Route",
        "type": "Book",
        "author": "ChatGPT",
        "year": 2024,
        "num_chapters": 12,
        "num_scenes": 56,
        "num_characters": 34,
        "num_locations": 21,
    }
    new_outline['chapters'] = chapters
    new_outline['scenes'] = scenes
    new_outline['characters'] = characters
    new_outline['locations'] = locations
    json.dump(new_outline, file, indent=4)

## get first and last lines for each scene

In [92]:
def get_scene_boundaries(text_file_path, line_file_path):
    # Read the main text file and find positions of "XXX"
    with open(text_file_path, 'r') as text_file:
        text_lines = text_file.readlines()
    
    # read file with line numbers
    with open(line_file_path, 'r') as line_file:
        line_numbers = line_file.readlines()
    
    # Gather start and end line numbers for each scene based on "XXX" markers
    scene_boundaries = []
    scene_start = 1  # Assuming line numbers start from 1
    for idx, line in enumerate(text_lines):
        if "XXX" in line:
            # find first line after "XXX" that is not empty
            next_line = text_lines[idx + 2][:50]

            # find line number in line_numbers file
            end_index = -1
            for i, line_num in enumerate(line_numbers):
                if next_line in line_num:
                    end_index = i + 1
                    scene_boundaries.append((scene_start, end_index - 1))
                    scene_start = end_index  # Set next scene start after "XXX"
                    idx += 3
                    break
    
    # add last scene boundary
    scene_boundaries.append((scene_start, len(line_numbers)))
    
    
    # Translate scene boundaries to line numbers
    # scene_line_numbers = [(line_numbers[start - 1], line_numbers[end - 1]) for start, end in scene_boundaries]
    
    return scene_boundaries

In [126]:
# Paths to your files
chapter_folder = f'chapters/{story}'              # Folder with chapter files
scripts_folder = f'scripts'                         # Folder with script files
text_file_path = f'{scripts_folder}/{story}_og.txt'  # File with main text and "XXX"

# read in story_outline.json
with open(f'json/{story}/story_outline.json', 'r') as file:
    story_outline = json.load(file)

chapters = story_outline['chapters']
scenes = story_outline['scenes']

chapter_lines = {}

for chapter in chapters:
    chapter_name = chapter['chapter']
    line_file_path = f'{chapter_folder}/{chapter_name}.txt'      # File with line numbers

    # Call the function and display results
    scene_line_numbers = get_scene_boundaries(text_file_path, line_file_path) 

    # Print scene line numbers
    print(chapter_name)
    for idx, (start, end) in enumerate(scene_line_numbers, 1):
        print(f"scene {idx}: starts at line {start}, ends at line {end}")
    print()

    chapter_lines[chapter_name] = scene_line_numbers

Chapter 1: The Merchant's Daughter
scene 1: starts at line 1, ends at line 32
scene 2: starts at line 33, ends at line 56
scene 3: starts at line 57, ends at line 76
scene 4: starts at line 77, ends at line 96

Chapter 2: Caravan of Secrets
scene 1: starts at line 1, ends at line 37
scene 2: starts at line 38, ends at line 72
scene 3: starts at line 73, ends at line 101
scene 4: starts at line 102, ends at line 127

Chapter 3: An Encounter in the Desert
scene 1: starts at line 1, ends at line 53
scene 2: starts at line 54, ends at line 87
scene 3: starts at line 88, ends at line 113
scene 4: starts at line 114, ends at line 147
scene 5: starts at line 148, ends at line 180

Chapter 4: Bandits in the Night
scene 1: starts at line 1, ends at line 40
scene 2: starts at line 41, ends at line 79
scene 3: starts at line 80, ends at line 117
scene 4: starts at line 118, ends at line 154

Chapter 5: The Town of Lost Souls
scene 1: starts at line 1, ends at line 52
scene 2: starts at line 53, e

In [127]:
chapter_lines

{"Chapter 1: The Merchant's Daughter": [(1, 32), (33, 56), (57, 76), (77, 96)],
 'Chapter 2: Caravan of Secrets': [(1, 37), (38, 72), (73, 101), (102, 127)],
 'Chapter 3: An Encounter in the Desert': [(1, 53),
  (54, 87),
  (88, 113),
  (114, 147),
  (148, 180)],
 'Chapter 4: Bandits in the Night': [(1, 40), (41, 79), (80, 117), (118, 154)],
 'Chapter 5: The Town of Lost Souls': [(1, 52),
  (53, 87),
  (88, 118),
  (119, 150)],
 'Chapter 6: The Scroll of Secrets': [(1, 42),
  (43, 76),
  (77, 105),
  (106, 132),
  (133, 165)],
 'Chapter 7: An Oath to Complete the Journey': [(1, 31),
  (32, 65),
  (66, 99),
  (100, 129),
  (130, 157)],
 'Chapter 8: The Golden Bazaar': [(1, 35),
  (36, 66),
  (67, 96),
  (97, 122),
  (123, 148)],
 'Chapter 9: Letters in the Dark': [(1, 43),
  (44, 74),
  (75, 102),
  (103, 130),
  (131, 156)],
 'Chapter 10: A Prince in Disguise': [(1, 38),
  (39, 69),
  (70, 96),
  (97, 121),
  (122, 149)],
 'Chapter 11: The Heart and the Choice': [(1, 39),
  (40, 68),
 

In [None]:
# add "first_line" and "last_line" to each scene
for chapter in chapters:
    chapter_name = chapter['chapter']
    print(chapter_name)
    scene_line_numbers = chapter_lines[chapter_name]
    print(scene_line_numbers)
    scene_num = 0
    for i, scene in enumerate(scenes):
        compare_chapter = chapter_name
        if ':' in chapter_name:
            compare_chapter = chapter_name.split(":")[0]
        if scene['chapter'] == compare_chapter:
            print(scene['chapter'], compare_chapter)
            print(scene_num)
            scene['first_line'] = scene_line_numbers[scene_num][0] 
            scene['last_line'] = scene_line_numbers[scene_num][1]
            scene_num += 1

Chapter 1: The Merchant's Daughter
[(1, 32), (33, 56), (57, 76), (77, 96)]
Chapter 1 Chapter 1
0
Chapter 1 Chapter 1
1
Chapter 1 Chapter 1
2
Chapter 1 Chapter 1
3
Chapter 2: Caravan of Secrets
[(1, 37), (38, 72), (73, 101), (102, 127)]
Chapter 2 Chapter 2
0
Chapter 2 Chapter 2
1
Chapter 2 Chapter 2
2
Chapter 2 Chapter 2
3
Chapter 3: An Encounter in the Desert
[(1, 53), (54, 87), (88, 113), (114, 147), (148, 180)]
Chapter 3 Chapter 3
0
Chapter 3 Chapter 3
1
Chapter 3 Chapter 3
2
Chapter 3 Chapter 3
3
Chapter 3 Chapter 3
4
Chapter 4: Bandits in the Night
[(1, 40), (41, 79), (80, 117), (118, 154)]
Chapter 4 Chapter 4
0
Chapter 4 Chapter 4
1
Chapter 4 Chapter 4
2
Chapter 4 Chapter 4
3
Chapter 5: The Town of Lost Souls
[(1, 52), (53, 87), (88, 118), (119, 150)]
Chapter 5 Chapter 5
0
Chapter 5 Chapter 5
1
Chapter 5 Chapter 5
2
Chapter 5 Chapter 5
3
Chapter 6: The Scroll of Secrets
[(1, 42), (43, 76), (77, 105), (106, 132), (133, 165)]
Chapter 6 Chapter 6
0
Chapter 6 Chapter 6
1
Chapter 6 Cha

In [129]:
scenes

[{'title': 'Packing for the Journey',
  'number': 1,
  'summary': 'Mei assists her father in preparing for the Silk Road journey.',
  'chapter': 'Chapter 1',
  'location': "Mei's family home",
  'importance': 0.5,
  'conflict': 0.1,
  'characters': [{'name': 'Mei',
    'role': 'protagonist, daughter of a tea merchant',
    'importance': 1.0,
    'emotion': 'eager yet uncertain',
    'sentiment': 0.3,
    'importance_rank': 1},
   {'name': 'Zhang',
    'role': "Mei's father, a respected merchant",
    'importance': 0.9,
    'emotion': 'focused on preparations',
    'sentiment': 0.2,
    'importance_rank': 2},
   {'name': 'Lin',
    'role': "Mei's servant and friend",
    'importance': 0.3,
    'emotion': 'supportive',
    'sentiment': 0.4,
    'importance_rank': 3}],
  'importance_rank': 4,
  'conflict_rank': 4,
  'first_line': 1,
  'last_line': 32},
 {'title': 'Yearning for Adventure',
  'number': 2,
  'summary': 'Mei expresses her desire to join the journey but faces resistance.',
  '

In [130]:
# update json file
with open(f'json/{story}/story_outline.json', 'w') as file:
    json.dump(story_outline, file, indent=4)