# first we need to build a guide

In [1]:
from litellm import completion

In [None]:
request = {
    "model": "gpt-4o",
    "temperature": 1.,
    # "max_tokens": 1,
    "messages": [
        {'role':'system', 'content':"""You're the world's most famous instructor for museum guides and contemporary art exhibition organizers.
You write instructions for the catalog writer, with illustrations of exhibits and tour rooms. Each illustration can be a photograph or drawing of a room, place, artifact, or it can be a collage assembled from several photographs or sketches.

Your task is to compose instructions for the author of the text accompanying the illustrations. Each accompanying text should contain about 500 words. Make a list of the issues that the author of the text should cover. What questions should he/she answer, what information should he/she provide to maximize the reader's sense of presence? What artistic devices should be used and what should be refrained from? What is the best way to link the story to the description of the previous exhibit?"""},
        # {'role':'user', 'content':instruction},        
    ],
    # "logprobs": True,
    # "top_logprobs": 2,
}
response = completion(**request)
# response

In [7]:
guide_instructions = response.choices[0].message.content
print( guide_instructions )

When composing instructions for the catalog writer crafting the accompanying text for illustrations of museum exhibits or contemporary art exhibitions, it's important to ensure that the text not only informs but also engages the reader, making them feel as though they are physically present at the exhibit. Below is a list of issues, questions, and guidelines for the author to consider while writing the text:

1. **Introduction and Contextualization:**
   - Begin with an engaging opening that sets the scene. Use vivid language to describe the illustration and the space it occupies.
   - Provide historical or cultural context relevant to the exhibit. Why is this piece significant? How does it fit into the broader history or narrative of the collection or exhibition?

2. **Detailed Description:**
   - Describe the exhibit with specific and sensory details. What does it look, feel, and sound like? If applicable, mention its size, material, and colors.
   - Highlight unique aspects or featu

# second, we want to describe each image and produce associations with it

In [8]:
# pip install json-repair
import os
import json
import base64
from glob import glob
from pprint import pprint
import json_repair
from openai import OpenAI

client = OpenAI()


In [11]:
def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


def query(img):
    response = client.chat.completions.create(
        # model="gpt-4-vision-preview",  # Replace with the correct model name
        model="gpt-4o", # -2024-05-13", # gpt-4o-2024-05-13
        messages=[
            {
                "role": "system",
                "content": [
                    {"type": "text", "text": """You're the world's most famous gallerist, art critic and art historian.
Looking at a given image (it can be a photograph, drawing or collage), do the following:

Step1: Describe in detail what you see in the image -- place, artefact or anything else.

Step2: Given the description from step1, provide a numbered list of 20 free associations with it. Try to think about corresponding sounds, smells, feelings. Speculate of the context and the author's figure. Use short texts -- 1-5 words each.

Step3: Read the description from step1 and the list of associations with it from step2. Rewrite and expand each of the associations into longer and more detailed (10-15 words each). Avoid repetitions and do not use the similar words across the sentences. Keep reffering to the original object. Provide a numbered list as a result.

Step4: Read the description from step1 and the list of expanded associations with it from step3. Write a shorter list of items using the following rules:
- Complementary items can be grouped into one item that combines the original descriptions.
- Weak, unsuccessful, or poorly related items to the original topic should be deleted.
- The final list should be no longer than 8 items, different items should have different meanings or contexts. Make them as concrete as possible.
Provide a numbered list as a result.

Output JSON dict with keys "STEP1", "STEP2", "STEP3", "STEP4" and corresponding values."""}
                    ],
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Analyse this image. OUTPUT ONLY JSON IN THE SPECIFIED FORMAT!"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{img}",
                            "detail": "high"
                        },                    
                    },
                ],
            }
        ],
        max_tokens=4000,
    )
    return json_repair.loads(response.choices[0].message.content)

# test it 
query(encode_image_to_base64("./test1.jpg"))

{'STEP1': 'The image shows an architectural structure with a wavy facade filled with large, rounded holes. The building is made of a beige material, and there are people walking in the open space in front of it, suggesting a public area. The sky is clear and blue, adding a sense of calmness to the scene.',
 'STEP2': ['Sand dunes',
  'Swiss cheese',
  'Caves',
  'Man-made wonder',
  'Playful architecture',
  'Sunlit plaza',
  'Lazy afternoon',
  'Cultural hotspot',
  'Meeting spot',
  'Modern art',
  'Organic design',
  'Curving lines',
  'Urban exploration',
  'Window patterns',
  'Summer day',
  'Sound of footsteps',
  'Warm echo',
  'Art installation',
  'New experience',
  'Communal space'],
 'STEP3': ['The wavy facade resembles sand dunes shaped over time by the wind in a desert.',
  'The perforated building walls remind one of Swiss cheese with its many holes.',
  'The rounded openings imitate natural caves carved out by water and erosion.',
  'A man-made wonder that captivates th

In [10]:
tour_images = open('vgg16@16_cl3_subusters_walk.txt').readlines() + open('vgg16@16_cl4_subusters_walk.txt').readlines()
len(tour_images), tour_images[:2]

(59,
 ['6.3406_4c3dab21ff61e3d969aa9b4b6ad2c135ebd6c58a1ced79ea87d0d98fc0406f52_2ea3e9d832ab4e1b4bcc7509a5a3c74b923cf251db0fe3f7c05f1f5be918ebbe.jpg\n',
  '6.4593_ec722755d1d7e12d9d17cc9edc70158c2b0daa7c55d74325d2163a31ac431244_f3bf4e3ee7143a91af8acadb9cf24f07d67eb5f3a5b9b8657e8306afcb114fa3.jpg\n'])

In [13]:
described = []
for idx, fn in enumerate(tour_images):
    fn = fn.strip()
    desc = query(encode_image_to_base64(f"./imgs/{fn}"))
    item = {
        'file': fn,
        'assoc': desc
    }
    described.append( item )
    if idx and not idx%10:
        try:
            print(idx, item['desc']['STEP1'] )
        except:
            pass
        with open(f'tour3-4.described.{idx}.json', 'w', encoding='utf-8') as ofh:
            print(json.dumps(described, indent=2), file=ofh)
with open(f'tour3-4.described.final.json', 'w', encoding='utf-8') as ofh:
    print(json.dumps(described, indent=2), file=ofh)

In [14]:
more_tour_images = open('vgg16@16_cl1_subusters_walk.txt').readlines()

for idx, fn in enumerate(more_tour_images):
    fn = fn.strip()
    desc = query(encode_image_to_base64(f"./imgs/{fn}"))
    item = {
        'file': fn,
        'assoc': desc
    }
    described.append( item )
    if idx and not idx%10:
        try:
            print(idx, item['assoc']['STEP1'] )
        except:
            pass
        with open(f'tour3-4-1.described.{idx}.json', 'w', encoding='utf-8') as ofh:
            print(json.dumps(described, indent=2), file=ofh)
with open(f'tour3-4-1.described.final.json', 'w', encoding='utf-8') as ofh:
    print(json.dumps(described, indent=2), file=ofh)

10 The image depicts a vast, white, cavernous space that resembles a cathedral made of ice or stone, with an intricately ribbed ceiling. A solitary figure, dressed in dark clothes, stands facing a distant large, triangular opening at the end of the chamber. The ambiance is serene and mystical.
20 The image depicts a flat landscape dotted with large, smooth, organic-shaped concrete structures, resembling giant puzzle pieces or abstract sculptures with openings. The structures are light gray and scattered across a grassy field, bordered by trees in the background under an overcast sky.
30 The image shows a series of architectural structures and landscapes with desert-like dunes. The structures appear to have smooth, undulating surfaces, resembling large sand formations. Some areas are populated with people, suggesting interaction with the space. The architecture harmonizes with the natural, sandy environment, creating a blend of modernity and nature. Different images show various perspec

# now let's try to generate final texts

In [36]:
prefix = """You're the world's most famous gallerist, art critic and art historian.
Looking at a description of an image of exhibit (it may be a photo, a drawing, or a collage) and a list of free associations with the object or place depicted, write a short essay aboit it (350-400 words). It should be a part of exhibition booklet, accompanying text for that illustration.

When the describing an exhibit, it's important to ensure that the text not only informs but also engages the reader, making them feel as though they are physically present at the exhibit. Below is a list of issues, questions, and guidelines for the author to consider (but not limited to) while writing the text:"""
bits = [
"""**Introduction and Contextualization:**
   - Begin with an engaging opening that sets the scene. Use vivid language to describe the illustration and the space it occupies.
   - Provide historical or cultural context relevant to the exhibit. Why is this piece significant? How does it fit into the broader history or narrative of the collection or exhibition?

""", """**Detailed Description:**
   - Describe the exhibit with specific and sensory details. What does it look, feel, and sound like? If applicable, mention its size, material, and colors.
   - Highlight unique aspects or features that might not be immediately obvious from the illustration.

""", """**Artist/Creator Background:**
   - Offer background information about the artist or creator. Include relevant details about their life, artistic journey, and influences.
   - Discuss any known motivations or themes behind the creation of this piece.

""", """**Interpretative Insights:**
   - Encourage the reader to think critically by providing different interpretations and meanings of the exhibit. How might various audiences perceive or relate to this piece?
   - Discuss any symbolism, motifs, or recurring themes present in the work.

""", """**Audience Engagement:**
   - Pose questions or prompts to the reader to foster engagement and personal reflection. For instance, “What emotions does this piece evoke for you?”
   - Suggest the experiential aspect; if the piece is interactive or participatory, explain how visitors might engage with it.

""", """**Connections and Comparisons:**
   - Make connections to previous exhibits or artworks in the collection. Consider thematic ties, the progression of styles, or historical evolution.
   - Draw comparisons to similar works by the same or different artists to provide a broader understanding.

""", """**Narrative and Flow:**
   - Ensure the narrative has a logical and compelling flow. Transition smoothly from the description to contextualization, moving through interpretation and engagement.
   - Use storytelling devices to make the text engaging and cohesive. Refrain from excessive jargon that might alienate a wider audience.

""", """**Conclusion and Departure:**
   - End with a thoughtful conclusion that encapsulates the significance of the piece and leaves a lasting impression.
   - Offer a link or transition to the next exhibit, creating anticipation and continuity. This could be thematic, historical, or stylistic.

""", """**Tone and Style:**
   - Maintain an accessible and inclusive tone that appeals to a diverse audience, encompassing both novice visitors and seasoned art enthusiasts.
   - Balance factual information with emotive and descriptive language to create a multisensory experience for the reader.

""", """**Avoid:**
    - Avoid overly technical language that might confuse or disengage the reader.
    - Steer clear of personal biases or speculative assertions not supported by evidence.

"""
]

postfix = """DO NOT REPEAT THE SAME STRUCTURE EACH TIME -- READ YOUR PREVIOUS TEXTS IF AVAILABLE AND MAKE THE RESULT DIVERSE AND UNIQUE EACH TIME"""
import numpy as np
def get_randomized_instruction():
    return prefix + "\n\n" + "".join(np.random.choice(bits, 5, replace=False))+"\n\n"+postfix

print( get_randomized_instruction() )

You're the world's most famous gallerist, art critic and art historian.
Looking at a description of an image of exhibit (it may be a photo, a drawing, or a collage) and a list of free associations with the object or place depicted, write a short essay aboit it (350-400 words). It should be a part of exhibition booklet, accompanying text for that illustration.

When the describing an exhibit, it's important to ensure that the text not only informs but also engages the reader, making them feel as though they are physically present at the exhibit. Below is a list of issues, questions, and guidelines for the author to consider (but not limited to) while writing the text:

**Interpretative Insights:**
   - Encourage the reader to think critically by providing different interpretations and meanings of the exhibit. How might various audiences perceive or relate to this piece?
   - Discuss any symbolism, motifs, or recurring themes present in the work.

**Artist/Creator Background:**
   - Offe

In [37]:
for idx in range(len(described)):
    print(idx, '#'*40)
    instruction = get_randomized_instruction()
    s1 = described[idx]['assoc']['STEP1']
    s4 = described[idx]['assoc']['STEP4']
    if isinstance(s4, list):
        s4 = "\n".join(s4)
    description = s1 + "\n" + s4

    messages = [ {'role':'system', 'content':instruction}, ]
    if idx>0:
        messages.append( {'role':'user', 'content':described[idx-1]['query']} )
        messages.append( {'role':'assistant', 'content':described[idx-1]['text']} )
    if idx>1:
        messages.append( {'role':'user', 'content':described[idx-2]['query']} )
        messages.append( {'role':'assistant', 'content':described[idx-2]['text']} )
    messages.append( {'role':'user', 'content':description} )
    
    request = {
        "model": "gpt-4o",
        "temperature": 1.,
        "messages": messages
    }
    text = completion(**request).choices[0].message.content
    described[idx]['query'] = description    
    described[idx]['text'] = text
    print(text)

    if idx and not idx%10:
        with open(f'tour3-4-1.described.texts.{idx}.json', 'w', encoding='utf-8') as ofh:
            print(json.dumps(described, indent=2), file=ofh)

with open(f'tour3-4-1.described.texts.final.json', 'w', encoding='utf-8') as ofh:
    print(json.dumps(described, indent=2), file=ofh)


# # response    
    

0 ########################################
The exhibit before you is the evocative work titled "Sanctuary of Knowledge," a masterful synthesis of reverence, history, and introspection. This scene invites viewers to step into a realm where time seems suspended, caught between the distinguished aura of erudition and the spiritual weight of devotion. The artist capturing this scene, renowned for blending evocative themes of faith and intellect, compels us into a contemplation of timelessness and the enduring essence of human inquiry.

Upon entering this imagined space, one immediately senses the presence of an environment steeped in history and sanctity. The tall, wooden bookshelves, groaning under the weight of numerous aging tomes, act as sentinels of knowledge—a repository of centuries worth of wisdom. The volumes themselves, bathed in the scent of musty paper and old wood, speak of their rich histories, some whispering tales from another age, some murmuring secrets long forgotten by t

In [42]:
import re
c = 0
for it in described[:]:
    c += len(re.split(r'[ \n]+', it['text']))
print(c)

69580


In [46]:
from collections import Counter
cnt = Counter()
for it in described[:]:
    t = it['text']
    if '**' not in t:
        continue
    t = t.split('**')[1]
    if ':' in t:
        t = t.split(":")[1]
    cnt.update(t.lower().split())

cnt.most_common(100)

[('of', 92),
 ('the', 51),
 ('echoes', 33),
 ('in', 10),
 ('stone', 9),
 ('horizons', 9),
 ('silent', 8),
 ('tomorrow', 8),
 ('and', 7),
 ('eternity', 7),
 ('silence', 6),
 ('solitude', 6),
 ('geometry', 6),
 ('time', 6),
 ('light', 5),
 ('whispering', 5),
 ('harmony', 5),
 ('visions', 5),
 ('timeless', 5),
 ('corridor', 4),
 ('shadows', 4),
 ('whispers', 4),
 ('grandeur', 4),
 ('through', 4),
 ('dreamscapes', 4),
 ('urban', 4),
 ('chamber', 3),
 ('contemplation', 3),
 ('enigmatic', 3),
 ('enigma', 3),
 ('passage', 3),
 ('pathways', 3),
 ('antiquity', 3),
 ('reflections', 3),
 ('reflection', 3),
 ('form', 3),
 ('sacred', 3),
 ('a', 3),
 ('symphony', 3),
 ('serenity', 3),
 ('guardians', 3),
 ('envisioning', 3),
 ('realms', 3),
 ('hall', 2),
 ('wisdom', 2),
 ('sanctum', 2),
 ('sanctuary', 2),
 ('modernity', 2),
 ('modern', 2),
 ('essence', 2),
 ('past', 2),
 ('baroque', 2),
 ('journey', 2),
 ('infinite', 2),
 ('to', 2),
 ('spaces', 2),
 ('architectural', 2),
 ('mirage', 2),
 ('odyssey', 

In [47]:
cnt2 = Counter()
for it in described[:]:
    cnt2.update( re.split(r'[ \n]+', it['text'].lower()) )
cnt2.most_common(100)

[('the', 5058),
 ('of', 3533),
 ('and', 2599),
 ('a', 2291),
 ('to', 1431),
 ('this', 1111),
 ('in', 941),
 ('that', 806),
 ('with', 734),
 ('an', 683),
 ('as', 653),
 ('its', 567),
 ('is', 500),
 ('into', 478),
 ('you', 465),
 ('where', 428),
 ('these', 381),
 ('through', 364),
 ('within', 301),
 ('architectural', 291),
 ('each', 283),
 ('it', 277),
 ('both', 256),
 ('by', 253),
 ('their', 246),
 ('between', 239),
 ('for', 220),
 ('human', 202),
 ('from', 188),
 ('**exhibition', 176),
 ('booklet:', 176),
 ('space', 169),
 ('on', 160),
 ('your', 154),
 ('urban', 153),
 ('not', 151),
 ('but', 140),
 ('here,', 139),
 ('echoes', 137),
 ('invites', 136),
 ('stone', 136),
 ('our', 134),
 ('are', 133),
 ('ancient', 131),
 ('yet', 126),
 ('architecture', 122),
 ('what', 120),
 ('at', 118),
 ('silent', 118),
 ('testament', 118),
 ('inviting', 117),
 ('journey', 117),
 ('light', 116),
 ('design', 111),
 ('sense', 110),
 ('modern', 108),
 ('us', 106),
 ('one', 104),
 ('world', 104),
 ('form', 10

In [58]:
prefix = """You're the world's most famous gallerist, art critic and art historian.
Looking at a description of an image of exhibit (it may be a photo, a drawing, or a collage) and a list of free associations with the object or place depicted, write a short essay aboit it (350-400 words). It should be a part of exhibition booklet, accompanying text for that illustration.

When the describing an exhibit, it's important to ensure that the text not only informs but also engages the reader, making them feel as though they are physically present at the exhibit. Below is a list of issues, questions, and guidelines for the author to consider (but not limited to) while writing the text:"""
bits = [
"""**Introduction and Contextualization:**
   - Begin with an engaging opening that sets the scene. Use vivid language to describe the illustration and the space it occupies.
   - Provide historical or cultural context relevant to the exhibit. Why is this piece significant? How does it fit into the broader history or narrative of the collection or exhibition?

""", """**Detailed Description:**
   - Describe the exhibit with specific and sensory details. What does it look, feel, and sound like? If applicable, mention its size, material, and colors.
   - Highlight unique aspects or features that might not be immediately obvious from the illustration.

""", """**Artist/Creator Background:**
   - Offer background information about the artist or creator. Include relevant details about their life, artistic journey, and influences.
   - Discuss any known motivations or themes behind the creation of this piece.

""", """**Interpretative Insights:**
   - Encourage the reader to think critically by providing different interpretations and meanings of the exhibit. How might various audiences perceive or relate to this piece?
   - Discuss any symbolism, motifs, or recurring themes present in the work.

""", """**Audience Engagement:**
   - Pose questions or prompts to the reader to foster engagement and personal reflection. For instance, “What emotions does this piece evoke for you?”
   - Suggest the experiential aspect; if the piece is interactive or participatory, explain how visitors might engage with it.

""", """**Connections and Comparisons:**
   - Make connections to previous exhibits or artworks in the collection. Consider thematic ties, the progression of styles, or historical evolution.
   - Draw comparisons to similar works by the same or different artists to provide a broader understanding.

""", """**Narrative and Flow:**
   - Ensure the narrative has a logical and compelling flow. Transition smoothly from the description to contextualization, moving through interpretation and engagement.
   - Use storytelling devices to make the text engaging and cohesive. Refrain from excessive jargon that might alienate a wider audience.

""", """**Conclusion and Departure:**
   - End with a thoughtful conclusion that encapsulates the significance of the piece and leaves a lasting impression.
   - Offer a link or transition to the next exhibit, creating anticipation and continuity. This could be thematic, historical, or stylistic.

""", """**Tone and Style:**
   - Maintain an accessible and inclusive tone that appeals to a diverse audience, encompassing both novice visitors and seasoned art enthusiasts.
   - Balance factual information with emotive and descriptive language to create a multisensory experience for the reader.

""", """**Avoid:**
    - Avoid overly technical language that might confuse or disengage the reader.
    - Steer clear of personal biases or speculative assertions not supported by evidence.

"""
]

postfix = """DO NOT REPEAT THE SAME STRUCTURE EACH TIME -- READ YOUR PREVIOUS TEXTS IF AVAILABLE AND MAKE THE RESULT DIVERSE AND UNIQUE EACH TIME.
IF IT'S POSSIBLE, REFFER TO THE OBJECT OR PLACE, NOT TO THE IMAGE ITSELF.
OUTPUT RESULT AS A JSON DICT, WITH THE FOLLOWING KEYS:
"title" -- an exhibition title,
"author" -- name or pseudonym of author, if known
"date" -- date of the original work, if known, can be approx -- like 'late 18th century' or '3rd century b.c'
"text" -- essay on this exhibit.
AVOID USING STEREOTYPES AND CLICHES, ESPECIALLY IN TITLES, FOR EXAMPLE AVOID WORDS LIKE: chamber, contemplation, corridor, dreamscape, echo, enigma, eternity, geometry, grandeur, harmony, horizon, light, shadow, silence, solitude, stone, through, time, timeless, tomorrow, urban, vision, whisper, etc.
AVOID BY ANY CHANCE USING THESE WORDS: delve, emphasize excel, intricate, mash, notably, offering, pivotal, potential, showcasing, steatotic, surpassing, underscore, additionally, encompass.
DO NOT USE COLON IN TITLES! 
USE LIGHT AND SIMPLE LANGUAGE."""
import numpy as np
def get_randomized_instruction():
    return prefix + "\n\n" + "".join(np.random.choice(bits, 5, replace=False))+"\n"+postfix

print( get_randomized_instruction() )

#  -- it can be exact year or even date if it's recent; or it can be approx age or century if it's ancient. '20th century' or '21th century' -- BAD.

You're the world's most famous gallerist, art critic and art historian.
Looking at a description of an image of exhibit (it may be a photo, a drawing, or a collage) and a list of free associations with the object or place depicted, write a short essay aboit it (350-400 words). It should be a part of exhibition booklet, accompanying text for that illustration.

When the describing an exhibit, it's important to ensure that the text not only informs but also engages the reader, making them feel as though they are physically present at the exhibit. Below is a list of issues, questions, and guidelines for the author to consider (but not limited to) while writing the text:

**Connections and Comparisons:**
   - Make connections to previous exhibits or artworks in the collection. Consider thematic ties, the progression of styles, or historical evolution.
   - Draw comparisons to similar works by the same or different artists to provide a broader understanding.

**Conclusion and Departure:**
 

In [59]:
for idx in range(len(described)):
    print(idx, '#'*40)
    instruction = get_randomized_instruction()
    s1 = described[idx]['assoc']['STEP1']
    s4 = described[idx]['assoc']['STEP4']
    if isinstance(s4, list):
        s4 = "\n".join(s4)
    description = s1 + "\n" + s4

    messages = [ {'role':'system', 'content':instruction}, ]
    if idx>0:
        messages.append( {'role':'user', 'content':described[idx-1]['query']} )
        messages.append( {'role':'assistant', 'content':json.dumps(described[idx-1]['text'], indent=2)} )
    elif idx>1:
        prev_idx = np.random.choice(list(range(idx)))
        messages.append( {'role':'user', 'content':described[prev_idx]['query']} )
        messages.append( {'role':'assistant', 'content':json.dumps(described[prev_idx]['text'], indent=2)} )
        prev_idx2 = np.random.choice(list(range(idx)))
        if prev_idx2!=prev_idx:
            messages.append( {'role':'user', 'content':described[prev_idx2]['query']} )
            messages.append( {'role':'assistant', 'content':json.dumps(described[prev_idx2]['text'], indent=2)} )
                               
    messages.append( {'role':'user', 'content':description} )
    
    request = {
        "model": "gpt-4o",
        "temperature": 1.,
        "messages": messages
    }
    text = json_repair.loads(completion(**request).choices[0].message.content)
    described[idx]['query'] = description    
    described[idx]['text'] = text
    print(text)

    if idx and not idx%10:
        with open(f'tour3-4-1.described.texts.{idx}.json', 'w', encoding='utf-8') as ofh:
            print(json.dumps(described, indent=2), file=ofh)

with open(f'tour3-4-1.described.texts.final.json', 'w', encoding='utf-8') as ofh:
    print(json.dumps(described, indent=2), file=ofh)


# # response    
    

0 ########################################
{'title': 'Sanctuary of Knowledge and Faith', 'author': 'Livia Meriwether', 'date': 'circa early 18th century', 'text': 'Nestled within an imposing library, this room is a testament to the intertwining of faith and knowledge. The tall wooden bookshelves that line the walls seem to reach for the heavens, harboring volumes that breathe the wisdom of centuries past. Their rich, dark grains exude a warmth that contrasts starkly with the cold, enduring stone of the crucifix centered in the room. \n\nThe stone crucifix, with its silently beseeching figure, stands as a sentinel among these repositories of human thought. Its presence infuses the space with a solemnity that calls to the devout and the curious alike. Here, the sacred coexists harmoniously with the intellect, a reminder that faith and reason have long been companions in the pursuit of truth.\n\nThe architecture embraces the visitor in a comforting ritual of age-old materials: the wooden 

In [60]:
Counter([it['text']['author'] for it in described[:]]).most_common()

[('Lydia Harrow', 3),
 ('Aria Linden', 2),
 ('Julian Marks', 2),
 ('Elise Laurente', 2),
 ('Rae Pelham', 2),
 ('S.M. Lafayette', 2),
 ('Genevieve Clark', 2),
 ('Ava Carter', 2),
 ('Elena Hartmann', 2),
 ('Julian Marlowe', 2),
 ('Elena Rios', 2),
 ('Clara Dunning', 2),
 ('Livia Meriwether', 1),
 ('Eleanor Wyatt', 1),
 ('Hugo Frost', 1),
 ('Isabel Thornton', 1),
 ('Eleanor Prescott', 1),
 ('Harold Whitmore', 1),
 ('Margaret Hayes', 1),
 ('Thomas Larkspur', 1),
 ('Elena Hartman', 1),
 ('J. M. Caldwell', 1),
 ('Annabel Leigh', 1),
 ('Ernestine Ashford', 1),
 ('Edwin Caulfield', 1),
 ('Amelia Linford', 1),
 ('Eleanor Pierce', 1),
 ('Lydia R. Townsend', 1),
 ('Elena M. Calder', 1),
 ('Marco Trenton', 1),
 ('Simone Arden', 1),
 ('Julian Evers', 1),
 ('Isabella Maurelle', 1),
 ('Julian Arlet', 1),
 ('Eleanor Lys', 1),
 ('Alden Finch', 1),
 ('Jules Nyman', 1),
 ('Felix Morreau', 1),
 ('Eli Foster', 1),
 ('Amara Lin', 1),
 ('Oliver Stanton', 1),
 ('Lydia Hartley', 1),
 ('Arturo Delacroix', 1),
 

In [65]:
Counter([re.sub(r'(constructed|observation|-inspired|vision)', '', it['text']['date'].lower()).replace('contemporary era', 'contemporary').strip() for it in described[:]]).most_common()

[('2023', 86),
 ('contemporary', 21),
 ('21st century', 4),
 ('early 20th century', 4),
 ('imagined future', 3),
 ('2022', 3),
 ('late medieval period', 2),
 ('19th century', 2),
 ('14th century', 2),
 ('mid 20th century', 2),
 ('early 19th century', 2),
 ('circa early 18th century', 1),
 ('around 5th century b.c.', 1),
 ('12th century', 1),
 ('ancient civilizations era', 1),
 ('unknown ancient period', 1),
 ('presumed medieval era', 1),
 ('early renaissance', 1),
 ('5th century', 1),
 ('early 16th century', 1),
 ('early 6th century', 1),
 ('1st century a.d.', 1),
 ('early 14th century', 1),
 ('circa 3rd century', 1),
 ('20th century', 1),
 ('timeless', 1),
 ('early 12th century', 1),
 ('mid-14th century', 1),
 ('late 17th century', 1),
 ('possibly medieval era', 1),
 ('present day', 1),
 ('early 21st century', 1),
 ('late 18th century', 1),
 ('13th century', 1),
 ('17th-18th century', 1),
 ('early 15th century', 1),
 ('late 19th century', 1),
 ('circa 21st century', 1),
 ('contemporar

In [76]:
final_list = described[:]
np.random.shuffle(final_list)

In [90]:
seen_authors = set()
seen_dates = set(['imagined future','ancient civilizations era','timeless', 'present day','futuristic, unspecified', 'contemporary, unspecified', 'speculative future', 'speculative antiquity','modern conception'])
seen_img = set()
c = 0
ct = 0
with open('guide_v0.html', 'w', encoding='utf-8') as ofh:
    for idx, it in enumerate(final_list[:]):
        date = re.sub(r'(constructed|observation|-inspired|vision)', '', it['text']['date'].lower()).replace('contemporary era', 'contemporary').strip()
        if date in seen_dates:
            date = 'unknown'
        else:
            seen_dates.add( date )
        author = it['text']['author']
        if author.lower() in seen_authors:
            author = 'unknown'
        else:
            seen_authors.add( author.lower() )
        if not('contemporary' in date or '20' in date or '19th' in date):
            author = 'unknown'
        if 'spanning ' in date:
            author = 'unknown'
        # print(date, author)
        if it['file'] in seen_img:
            # print('DUP', idx)
            continue
        if idx in [39, 41, 47, 70, 82, 83, 85, 99, 106, 108, 124, 126, 130, 131, 140, 141, 144, 148, 158, 165, 170, 171, 172, 176, 177, 5,14, 149]: # 6, 96, 119, 
            continue
        print(f"<h1>{it['text']['title']}</h1>", file=ofh) # #{idx} 
        h2 = ""
        if author != 'unknown':
            h2 += f"{author}. "
        if date != 'unknown':
            h2 += f"{date.capitalize()}. "
        if h2.endswith('..'):
            h2 = h2[:-1]
        if h2:
            print(f"<h2><i>{h2.strip()}</i><h2>", file=ofh)
        t = it['text']['text'].replace('\n','<br>')
        seen_img.add( it['file'] )
        c += len(re.split(r'[ \n]+', it['text']['text']))
        c += len(re.split(r'[ \n]+', it['text']['title']))
        if h2:
            c += len(re.split(r'[ \n]+', h2.strip()))
        ct += 1
        if ct%2:
            print(f"<table width=1800px><tr><td valign=top><p style='margin-right: 20px; font-size:20px;'>{t}</td>", file=ofh)
            print(f"<td valign=top><img src='./imgs/{it['file']}'/></td></tr></table>", file=ofh)
        else:
            print(f"<table width=1800px><tr><td valign=top><img src='./imgs/{it['file']}'/></td>", file=ofh)
            print(f"<td valign=top><p style='margin-left: 20px; font-size:20px;'>{t}</td></tr></table>", file=ofh)
        print("<br>", file=ofh)
        final_list[idx]['selected'] = True
        final_list[idx]['selected_h1'] = it['text']['title']
        final_list[idx]['selected_h2'] = h2.strip()
        final_list[idx]['selected_text'] = t
# # Display cluster examples with indices
#     for cluster, indices in cluster_examples_with_indices.items():
#         if cluster not in clusters_to_draw: continue
print(c, ct)

52236 150


In [88]:
with open(f'tour3-4-1.described.texts.final.cleaned.json', 'w', encoding='utf-8') as ofh:
    print(json.dumps(final_list, indent=2), file=ofh)

In [8]:
import json
import re 

final_list = json.load(open('tour3-4-1.described.texts.final.cleaned.json', encoding='utf-8'))

seen_authors = set()
seen_dates = set(['imagined future','ancient civilizations era','timeless', 'present day','futuristic, unspecified', 'contemporary, unspecified', 'speculative future', 'speculative antiquity','modern conception'])
seen_img = set()
c = 0
ct = 0

prefix = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <style>
        @page {
            size: 297mm 297mm;
            margin: 10mm;
        }

        body {
            margin: 10px;
            font-family: 'Georgia', serif;
            line-height: 1.6;
            width: 1000px;
        }
        h1 {
            font-size: 2rem;
            font-family: 'Baskerville', serif;
            page-break-after: avoid;
            margin-top: 0px;
            margin-bottom: 10px;
        }
        h2 {
            font-size: 1.5rem;
            font-family: 'Baskerville', serif;
            page-break-after: avoid;
            margin-top: 0px;
            margin-bottom: 10px;
        }
        p {
            text-indent: 50px;
            text-align: justify;
            font-family: 'Helvetica', serif;
        }
        .chapter {
            break-inside: avoid;
            page-break-inside: avoid;
        }
        .float-image-right {
            float: right; /* Use 'right' for right-aligned image */
            margin: 0px 15px 20px 50px; /* Adjust margins for spacing */
            width: 450px; /* Adjust width to fit design */
            height: auto;
            position: relative;
            clear: right; /* Pushes the image downward */            
        }
        .float-image-left {
            float: left; /* Use 'right' for right-aligned image */
            margin: 0px 50px 20px 15px; /* Adjust margins for spacing */
            width: 450px; /* Adjust width to fit design */
            height: auto;
            position: relative;
            clear: right; /* Pushes the image downward */            
        }
    </style>
</head>
<body>"""

with open('guide_v1.html', 'w', encoding='utf-8') as ofh:
    print(prefix, file=ofh)
    for idx, it in enumerate(final_list[:]):
        # clean strange dates and authors
        date = re.sub(r'(constructed|observation|-inspired|vision)', '', it['text']['date'].lower()).replace('contemporary era', 'contemporary').strip()
        if date in seen_dates:
            date = 'unknown'
        else:
            seen_dates.add( date )
        author = it['text']['author']
        if author.lower() in seen_authors:
            author = 'unknown'
        else:
            seen_authors.add( author.lower() )
        if not('contemporary' in date or '20' in date or '19th' in date):
            author = 'unknown'
        if 'spanning ' in date:
            author = 'unknown'
        if it['file'] in seen_img:
            continue
        if idx in [39, 41, 47, 70, 82, 83, 85, 99, 106, 108, 124, 126, 130, 131, 140, 141, 144, 148, 158, 165, 170, 171, 172, 176, 177, 5,14, 149]: 
            continue
        print(f"<div class='chapter'><h1>{it['text']['title']}</h1>", file=ofh) # #{idx} 
        h2 = ""
        if author != 'unknown':
            h2 += f"{author}. "
        if date != 'unknown':
            h2 += f"{date.capitalize()}. "
        if h2.endswith('..'):
            h2 = h2[:-1]
        if h2:
            # h2 = h2.rstrip('.')
            print(f"<h2><i>{h2.strip().strip('.')}</i><h2>", file=ofh)
        t = it['text']['text'].replace('\n','</p><p>')
        seen_img.add( it['file'] )
        c += len(re.split(r'[ \n]+', it['text']['text']))
        c += len(re.split(r'[ \n]+', it['text']['title']))
        if h2:
            c += len(re.split(r'[ \n]+', h2.strip()))
        ct += 1
        floating = "right" if ct%2 else "left"
        print(f"<img src='./imgs/{it['file']}' alt='{it['text']['title']}' class='float-image-{floating}'>\n</div>\n<div>", file=ofh)
        print(f"<p>{t}</p>\n</div>\n", file=ofh)

print(c, ct)

52236 150
