In [1]:
#!pip install --upgrade transformers==4.37.2

In [2]:
# Don't even know if these are necessary, but most models have required them
#!pip install einops accelerate

In [3]:
#!pip install flash-attn --no-build-isolation

In [4]:
#!pip install timm

In [5]:
import math
import torch
from transformers import AutoTokenizer, AutoModel

In [6]:
def split_model(model_name):
    device_map = {}
    world_size = torch.cuda.device_count()
    num_layers = {
        'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32,
        'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name]
    # Since the first GPU will be used for ViT, treat it as half a GPU.
    num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
    num_layers_per_gpu = [num_layers_per_gpu] * world_size
    num_layers_per_gpu[0] = math.ceil(num_layers_per_gpu[0] * 0.5)
    layer_cnt = 0
    for i, num_layer in enumerate(num_layers_per_gpu):
        for j in range(num_layer):
            device_map[f'language_model.model.layers.{layer_cnt}'] = i
            layer_cnt += 1
    device_map['vision_model'] = 0
    device_map['mlp1'] = 0
    device_map['language_model.model.tok_embeddings'] = 0
    device_map['language_model.model.embed_tokens'] = 0
    device_map['language_model.output'] = 0
    device_map['language_model.model.norm'] = 0
    device_map['language_model.lm_head'] = 0
    device_map[f'language_model.model.layers.{num_layers - 1}'] = 0

    return device_map

In [7]:
path = "OpenGVLab/InternVL2-Llama3-76B"
device_map = split_model('InternVL2-Llama3-76B')
model = AutoModel.from_pretrained(
    path,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    use_flash_attn=True,
    trust_remote_code=True,
    device_map=device_map).eval()

A new version of the following files was downloaded from https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B:
- modeling_intern_vit.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/122k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/32 [00:00<?, ?it/s]

model-00001-of-00032.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00032.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00032.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00004-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00005-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00006-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00007-of-00032.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00008-of-00032.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00009-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00010-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00011-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00012-of-00032.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00013-of-00032.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00014-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00015-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00016-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00017-of-00032.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00018-of-00032.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00019-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00020-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00021-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00022-of-00032.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00023-of-00032.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00024-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00025-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00026-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00027-of-00032.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00028-of-00032.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00029-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00030-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00031-of-00032.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00032-of-00032.safetensors:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/32 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [8]:
import numpy as np
import torchvision.transforms as T
from PIL import Image
from torchvision.transforms.functional import InterpolationMode

In [9]:
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)



tokenizer_config.json:   0%|          | 0.00/57.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

def build_transform(input_size):
    MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
    transform = T.Compose([
        T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
        T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])
    return transform

In [11]:
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
    best_ratio_diff = float('inf')
    best_ratio = (1, 1)
    area = width * height
    for ratio in target_ratios:
        target_aspect_ratio = ratio[0] / ratio[1]
        ratio_diff = abs(aspect_ratio - target_aspect_ratio)
        if ratio_diff < best_ratio_diff:
            best_ratio_diff = ratio_diff
            best_ratio = ratio
        elif ratio_diff == best_ratio_diff:
            if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
                best_ratio = ratio
    return best_ratio

In [12]:
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
    orig_width, orig_height = image.size
    aspect_ratio = orig_width / orig_height

    # calculate the existing image aspect ratio
    target_ratios = set(
        (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
        i * j <= max_num and i * j >= min_num)
    target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])

    # find the closest aspect ratio to the target
    target_aspect_ratio = find_closest_aspect_ratio(
        aspect_ratio, target_ratios, orig_width, orig_height, image_size)

    # calculate the target width and height
    target_width = image_size * target_aspect_ratio[0]
    target_height = image_size * target_aspect_ratio[1]
    blocks = target_aspect_ratio[0] * target_aspect_ratio[1]

    # resize the image
    resized_img = image.resize((target_width, target_height))
    processed_images = []
    for i in range(blocks):
        box = (
            (i % (target_width // image_size)) * image_size,
            (i // (target_width // image_size)) * image_size,
            ((i % (target_width // image_size)) + 1) * image_size,
            ((i // (target_width // image_size)) + 1) * image_size
        )
        # split the image
        split_img = resized_img.crop(box)
        processed_images.append(split_img)
    assert len(processed_images) == blocks
    if use_thumbnail and len(processed_images) != 1:
        thumbnail_img = image.resize((image_size, image_size))
        processed_images.append(thumbnail_img)
    return processed_images

In [13]:
import requests
from io import BytesIO

def load_image(image_file, input_size=448, max_num=12):
    # Check if the input is a URL
    if isinstance(image_file, str) and image_file.startswith("http"):
        response = requests.get(image_file)
        image = Image.open(BytesIO(response.content)).convert('RGB')
    else:
        image = Image.open(image_file).convert('RGB')
    
    transform = build_transform(input_size=input_size)
    images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
    pixel_values = [transform(img) for img in images]
    pixel_values = torch.stack(pixel_values)
    
    return pixel_values

In [14]:
urls = [
 'https://samplebook.photos/img/4786a.jpg',
 'https://samplebook.photos/img/284v.jpg',
 'https://samplebook.photos/img/3193c.jpg',
 'https://samplebook.photos/img/948d.jpg',
 'https://samplebook.photos/img/3191i.jpg',
 'https://samplebook.photos/img/4788oo.jpg',
 'https://samplebook.photos/img/2086d.jpg',
 'https://samplebook.photos/img/4781r.jpg',
 'https://samplebook.photos/img/5521n.jpg',
 'https://samplebook.photos/img/2073cc.jpg',
 'https://samplebook.photos/img/2085b.jpg',
 'https://samplebook.photos/img/4782c.jpg',
 'https://samplebook.photos/img/4783v.jpg',
 'https://samplebook.photos/img/4912gg.jpg',
 'https://samplebook.photos/img/304h.jpg',
 'https://samplebook.photos/img/2078i.jpg',
 'https://samplebook.photos/img/2077q.jpg',
 'https://samplebook.photos/img/3192h.jpg',
 'https://samplebook.photos/img/904x.jpg',
 'https://samplebook.photos/img/4792b.jpg',
 'https://samplebook.photos/img/5301p.jpg',
 'https://samplebook.photos/img/5207j.jpg',
 'https://samplebook.photos/img/1007p.jpg',
 'https://samplebook.photos/img/17l.jpg',
 'https://samplebook.photos/img/286t.jpg',
 'https://samplebook.photos/img/287g.jpg',
 'https://samplebook.photos/img/3189j.jpg',
 'https://samplebook.photos/img/4911d.jpg',
 'https://samplebook.photos/img/4785a.jpg',
 'https://samplebook.photos/img/4793a.jpg'
]

In [15]:
prompt = 'There are two objects in this image. One is a color checker, and the other is a photographic sample book, open to a page containing a photograph. Give a detailed description of the photograph.'

In [16]:
def get_response(url):
    # set the max number of tiles in `max_num`
    pixel_values = load_image(url, max_num=12).to(torch.bfloat16).cuda()
    generation_config = dict(max_new_tokens=1024, do_sample=True)

    question = f'<image>\n{prompt}'
    response = model.chat(tokenizer, pixel_values, question, generation_config)

    return response

In [17]:
responses_intern = {}

for url in urls:
    response = get_response(url)
    print(url,"\n",response,"\n\n")
    responses_intern[url] = {'OpenGVLab/InternVL2-Llama3-76B':response}

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4786a.jpg 
 The photograph presented in the open sample book depicts a rugged, rocky terrain that appears harsh and inhospitable. The image is black and white, enhancing the contrast between the shadows and the brightly illuminated areas. On the right side of the picture, a steep, uneven incline is visible, showcasing layered rock formations with sharp, jagged edges. The textures of the rocks are highly pronounced, revealing intricate details of their surfaces. Small, indistinct figures, likely humans, can be seen climbing or engaged in some activity on the rock face, though their specific actions are difficult to discern due to the distance. The scene conveys a sense of adventure and the challenges of navigating such terrain, suggesting a possible historical or exploratory context. The overall impression is one of strength and resilience, both in the natural landscape and, by implication, in the human figures interacting with it. 




Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/284v.jpg 
 The photograph in the sample book is a black-and-white image depicting a high-voltage electrical substation. The focus is on two tall, cylindrical insulators that stand vertically on the substation floor. Each insulator has a top part featuring a cluster of bushings and lines that ascend and connect to the overhead lattice of power lines and structures. The metallic connectors at the top of the insulators suggest that they are part of the transmission network, facilitating the flow of electricity. In the background, a dense network of metal trusses and support structures is visible, forming a complex web that typifies the architecture of electrical substations.

The word "VARILOUR" is printed at the bottom left of the page, followed by the statement "WHITE GLOSSY." Additionally, there is a small logo of "KOPION" in the bottom center area of the page. This information likely indicates that the photograph is an example of how images appear when pr

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/3193c.jpg 
 The photograph displayed on the right side of the open publication reveals a well-decorated room. The photograph is black and white, adding a timeless quality to the scene.

### Description of the Photograph:

1. **Lighting and Decor:**
    - **Chandelier:** Central to the image is a large, ornate chandelier hanging from the ceiling. The chandelier is decorated with elaborate crystal pendants, which would add a sense of grandeur and sparkle when lit, reflecting a refined aesthetic. The photo captures the chandelier's intricate design clearly.

2. **Furniture:**
    - **Chairs and Sofa:** There are several chairs and a sofa around what appears to be a central table or coffee table. The furniture has a classic style, with wooden frames and upholstered seating surfaces. The patterns on the upholstery are subtly visible, suggesting a traditional design.
    - **Table:** There is a table at the center of the room, potentially used as a coffee table.

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/948d.jpg 
 The photograph in the sample book appears to be an old, black-and-white image showcasing a scene from a forest or wooded area. 

Here are the details:

1. **Foreground**: Several trees dominate the foreground. These trees have thin trunks and are likely birch trees, given their characteristic white bark with horizontal markings. The branches spread out and appear to create a network of intricate patterns against the sky.

2. **Middle Ground**: Behind the trees, the forest continues with more foliage. The greenery and density of the middle ground give a sense of depth to the image, hinting at a well-wooded area.

3. **Background**: In the background, there appears to be a waterfall or a stream of some sort, partially obscured by the trees. The water cascades down, adding a dynamic element to the otherwise still scene of the forest. The presence of the water makes the setting more picturesque and serene.

4. **Lighting and Texture**: The lighting 

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/3191i.jpg 
 The photograph in the sample book features the iconic Trevi Fountain, a famous baroque fountain located in Rome, Italy. Here’s a detailed description of the photograph based on the given image:

1. **Subject - Trevi Fountain**: The main subject of the photograph is the Trevi Fountain, a famous landmark. The fountain is characterized by its ornate sculptures and dramatic cascading water.

2. **Architecture**: The fountain is set in a classical architectural structure. The backdrop of the image shows a building with several large windows and ornamental detailing typical of Baroque architecture.

3. **Foreground and Steps**: Steps leading up to the fountain are prominently featured in the foreground. The steps appear to be made of stone and are broad and sturdy.

4. **Water Feature**: The fountain’s water cascades down the steps in dramatic fashion, creating a dynamic visual effect. At the top of the fountain, there is a statue that appears to be 

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4788oo.jpg 
 In this image, there are two main objects: a color checker and an open sample book with a photograph. Here is a detailed description of the photograph:

1. **Photograph Composition:**
   - **Subject:** The photograph features a portrait of a man with a serious or contemplative expression. 
   - **Attire:** The man is wearing a hat and a jacket, suggesting an outdoor or rugged context. 
   - **Pose:** He is sitting and holding what appears to be a clay pipe in his right hand. His facial expression seems to be thoughtful or somber. 
   - **Background:** The background of the photograph is indistinct, providing a focus on the subject’s face and upper body. It is monochromatic, which emphasizes the subject more intensely.

2. **Technical Aspects:**
   - **Monochrome:** The photograph is monochromatic, with a grayscale palette. The absence of color directs all visual interest towards the subject’s expression, attire, and the texture of his features

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/2086d.jpg 
 The photograph depicted in the image is a black and white portrait of a middle-aged man lifting a child above his head. Both the man and the child are dressed in light-colored clothing. The man has a beard and is looking up at the child, whom he is supporting by the child's legs. The child appears to be wearing a bib or light smock and looks content and safe in the man's grasp. 

The photograph is part of a sample book showcasing Kodak Ektalure Paper X, which likely refers to the type of photographic paper used to print this image. The image's clarity and contrast are well-preserved, illustrating the high quality of Kodak's photographic paper. The color checker above the sample book is likely used to ensure accurate color calibration and reproduction of the photographs in the book. 




Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4781r.jpg 
 The photograph presents a black-and-white image of a smiling man, positioned on a two-page spread in what appears to be a photographic sample book. The man is seated, likely at a desk or table, with his arms gently resting on the surface. He wears a light-colored, long-sleeved shirt and maintains a relaxed yet confident posture. His expression is friendly and approachable, enhanced by his direct gaze and the warmth conveyed through his smile. 

The photograph occupies the right page of the open book, with the man centered on the right-hand side. He has short, neatly styled hair and a clean-shaven face. His attire and overall demeanor suggest a casual, yet professional or formal setting, possibly from a mid-20th century context given the stylistic elements of the image and the attire.

Adjacent to the photograph, on the left page of the book, is a detailed "Print Toning Table." The contents of the table list various types of photographic paper b

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/5521n.jpg 
 In the photograph, there is an object that appears to be a stack of medals. The medals are organized in a circular, tiered structure, with each medal seemingly looped together with a white ribbon that has a metallic buckle or clasp. The circular array presents a detailed and methodical arrangement, emphasizing the organized display of the medals. The background of the photograph is intentionally dark, which helps in highlighting the medals and their shiny, reflective surfaces against the contrasting backdrop. The lighting and composition of the photograph effectively bring out the textural details and the metallic sheen of the medals, emphasizing their significance and value.

Additionally, there is a caption beneath the photograph that reads: "Kodak Medalist Paper F Available in SW & DW." This suggests that the photograph is likely a sample image from a Kodak Medalist Paper type, used to showcase the quality and features of this particular pho

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/2073cc.jpg 
 The image exhibits a spiral-bound sample book titled "KODAK BROMIDE," specifically a waterproof, white matte, glossy A4 paper type (KODAK BROVIRA WAT. GL. F.A.). The left page features a black-and-white aerial photograph of a city area with many buildings and streets. The streets seem organized in a grid pattern, characteristic of many urban layouts. Major buildings form a larger, squared perimeter, indicating it might be an important compound or facility. There are open areas surrounding these buildings, suggesting this complex might be situated near water or open land. The right page includes detailed text descriptions about the photographic paper's specifications, usage, and handling instructions, which help in interpreting the photographic quality. The image includes a small text, "Printed by Kodak Ltd. at Kodak Ltd., Harrow, England," adding to the authenticity and quality assurance of the photo paper.

Above the sample book is a "Macbeth

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/2085b.jpg 
 The photograph in the sample book captures a moment of leisure by a body of water. Here are the details:

1. **Setting**: The scene appears to be outdoors near a body of water.
2. **Ground**: The ground surface is textured, suggesting it might be a pebbled or sandy beach.
3. **Subject**: The photograph depicts two figures.
   - One figure, closer to the water, seems to be throwing a stone or pebble into the water.
   - The other figure is seated on the ground, slightly behind the first figure.
4. **Activity**: The primary activity appears to be stone-throwing, a common recreational activity near lakes or rivers.
5. **Attire**: Both individuals are dressed in casual attire suitable for a day out.
6. **Lighting and Shadows**: The image appears to be in black and white, which affects the perception of color and tone. Shadows and lighting indicate the photograph was taken on a sunny day.

In conclusion, the photograph captures a candid and relatabl

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4782c.jpg 
 The photograph is a black and white image showcasing a person seated on a sailboat. The individual is wearing traditional attire, which includes a long, flowing garment that covers most of their body and a headscarf. They are positioned in the center of the composition, holding onto a section of the boat’s rigging, likely a part of the sail or its supporting structure. 

The person is seated on a wooden deck, with various parts of the sail and the mast visible around them. The sail appears to be made of a light, perhaps woven fabric, and is hoisted at an angle that suggests the boat is in motion, possibly catching the wind. The lines and ropes of the boat's rigging are also visible, indicating the complexity and intricacy of the sailboat's design.

In the background, there is an expanse of open water, which adds to the maritime theme of the image. The horizon line is not clearly depicted, likely because the focus is on the person and the immedi

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4783v.jpg 
 The image depicts an open page from a photographic sample book. The photograph on the left page features two objects: a decorative vase and a small elephant statue. 

**Vase:**
- The vase is tall and slender with an ornate pattern.
- It appears to be made of a reflective material such as metal or a glossy ceramic.
- The pattern includes intricate swirls and floral motifs that cover the entire surface.
- The vase casts a shadow, indicating a light source to the left.

**Small Elephant Statue:**
- The elephant statue is placed to the right of the vase.
- The statue is depicted in mid-motion, as if walking or stepping forward.
- The elephant has its trunk raised slightly and its ears are perked up.
- The surface of the statue also appears reflective, resembling a smooth, metallic finish.
- Like the vase, this statue too casts a distinct shadow suggesting the direction of light.

**Photograph Characteristics:**
- The photograph is black-and-white, 

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4912gg.jpg 
 In this image, the photograph situated in the photographic sample book displays a black-and-white image of what appears to be a large dam or structure related to water management. This conclusion is drawn from the curved, towering wall, characteristic of dam structures, which spans horizontally across the photograph. The dam wall reflects a sense of scale and engineering prowess typical of such constructions.

At the bottom of the photograph, there is a body of water indicative of a reservoir or river contained by the dam. The water's surface is mostly flat with some reflections, suggesting that the environment at the time the photograph was taken was relatively calm. The dam itself is intricately detailed with visible textures and shadows, emphasizing its robust construction and the play of light across its curvature.

Additionally, to the right side of the dam structure, there appears to be a series of platforms or walkways, possibly used fo

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/304h.jpg 
 The photograph in the image appears to be an instructional or demonstration image, which is part of the Kodak Photographic Sample Book. Let's break down the details meticulously.

**1. Title and Explanation:**
   - The photograph is accompanied by descriptive text, although the full explanation isn’t entirely legible here, it typically would include a comprehensive guide to the techniques or products demonstrated within the image.

**2. Equipment and Setting:**
   - The photograph captures a person engaged in the process of operating or handling photographic equipment. The individual seems to be working with a larger format camera or similar apparatus.
   - The equipment in the image features a cylindrical section, likely indicating a part of a camera, lens, or film loader.
   - Given the appearance and the context provided by the adjacent text in the sample book, it appears to demonstrate an instructional or procedural guide, possibly related t

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/2078i.jpg 
 The photograph in the image is black and white and depicts an object that resembles a vintage camera. The camera is positioned in such a way that it is facing slightly to the right, showcasing its front and part of its side. The image is characterized by strong contrasts, with the camera's body appearing dark while its front lens and parts of its metallic surface reflect light, creating highlights.

The camera is mounted on a flat surface, and it looks like an older or retro model. Below the photograph, there is text that reads "ILFORD Plastika A.2," which likely refers to a type of photographic paper produced by the company Ilford. The text is clear, with a larger font used for "ILFORD" and a smaller font for "Plastika A.2." The overall setting of the photograph is minimalist, with the camera being the primary subject, and there are no additional objects or backgrounds within the frame to divert attention. 




Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/2077q.jpg 
 The image contains a color checker and an open photographic sample book. The sample book displays a photograph, and here is a detailed description:

### Photograph Description:

1. **Subject Matter**:
   - The photograph features a mountainous landscape, likely from an aerial or high vantage point.
   - The central focus is a winding river or glacier cutting through rugged terrain, surrounded by steep canyon walls or mountains.
   - The setting appears desolate with no human presence or man-made structures, emphasizing the natural, untouched scenery.

2. **Tone and Style**:
   - The overall tone of the photograph is monochrome or grayscale.
   - The contrast seems to be well-balanced, showcasing the textures and contours of the landscape clearly.
   
3. **Print Quality**:
   - The photograph appears to be of professional quality with clear details and good print resolution.
   - The gradation shades provide depth and dimensionality to the lands

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/3192h.jpg 
 Certainly! Here's a detailed description of the photograph in the image:

**Content of the Photograph**:
- **Foreground**: The forefront of the image features an arched structure made of stone or concrete. The arch acts as a frame for the central visual element, suggesting classical architecture.
- **Midground**: The arch looks out over a paved pathway leading away from the vantage point. This pathway cuts through what appears to be a garden or grassy field, suggesting the setting might be a landscaped park or cemetery.
- **Background**: Further down the path, a tall, narrow column or obelisk rises prominently. This structure is thin and cylindrical, likely made of a similar material to the arch. Surrounding the obelisk are two tall, slender trees, contributing to the vertical emphasis in the composition.
- **Background**: Beyond the column and trees, the vista opens up to a more distant landscape, likely a valley or open plain extending to the

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/904x.jpg 
 The photograph in the sample book depicts a wintry landscape scene. The background is adorned with tall, bare trees, possibly deciduous trees that have shed their leaves in winter. The ground is covered with a thick layer of snow. This snowy backdrop extends throughout the image, creating a sense of depth and highlighting the serene, cold environment.

In the foreground, there is a partially frozen stream or small river. The water is visible in places despite the snow cover. The stream’s pathway cuts through the snow-covered landscape, potentially suggesting the contrast between the coldness of the snow and the enduring flow of water.

The photograph seems to be monochrome (black and white), emphasizing the textures and shades of the snowy scene without the distraction of color. This choice might be intending to focus on the interplay between light and shadow, with the snow providing a natural canvas for these interactions.

The edges of the pho

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4792b.jpg 
 The photograph in the sample book depicts a collection of metallic objects with a complex, symmetrical design. The objects are arranged in a circular formation on a flat surface and appear to be metallic due to the reflections and shadows. They are possibly parts of a machine or mechanical device, given the intricate patterns and circular motifs that resemble gears or part of a mechanical structure.

The image is in black and white, offering a stark contrast which highlights the details and textures of the metallic objects. There appear to be multiple layers or tiers to the arrangement, providing depth and complexity to the composition. The background of the photograph is dark, making the metallic patterns stand out prominently. The objects possess circular designs, which might suggest functionality related to rotational movement.

In summary, the photograph captures a detailed arrangement of metallic, circular objects, possibly mechanical part

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/5301p.jpg 
 The photograph features a monochromatic, black-and-white image of a person wearing a decorated cap, likely a traditional or military headdress. The person in the photograph appears to be engaging in a thoughtful or intense moment, as indicated by their clasped hands near their face. 

The setting around this individual adds a dramatic touch to the image. Positioned next to the person is a ceramic vase adorned with a detailed floral pattern. Below the vase, there is a piece of sheer, fluttering fabric. The play of light and shadow, particularly the way the light falls on the person’s face and the fabric, adds a rich contrast and depth to the photograph. 

Adjacent to the photo, on the same page as the image, is text that credits the photographer as "Fredrick A. Meyer." Below the attribution is the name of a gallery, "LIFEBROM GALERIE," followed by further details and a credit line to another person, "Stuart. Double Weight," likely referencing th

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/5207j.jpg 
 Certainly! In this image, there are two objects: a color checker and a photographic sample book opened to a page with a photograph. Let’s delve into a detailed description of the photograph:

### Description of the Photograph:
1. **Content**: The photograph depicts a small bird, possibly a chick or a fledgling, nestled amidst some fabric and fibers, likely part of a nest or bed of material designed for young birds or animals. The bird appears to be looking straight at the camera with its beak slightly open, a typical pose in avian photography.
   
2. **Composition**: The bird is quite central in the composition, but its placement is slightly off-center to the right side of the image, creating a more dynamic and engaging frame. The background contains a mix of textures and shadows, adding depth to the photograph.

3. **Color & Tone**: The image appears to be black and white, or it could be a high-contrast image where the colors have been desatur

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/1007p.jpg 
 Certainly! Here's a detailed description of the photograph found in the sample book, alongside additional contextual information based on the image:

**Description of the Photograph:**

The photograph depicts a young child standing and laughing joyfully. The child appears to be positioned slightly to the right, looking perhaps at a person or an object off-camera, which has prompted a moment of genuine laughter. Here are some detailed observations:

1. **Child’s Appearance:**
   - **Clothing:** The child is dressed in a simple and classic outfit. They are wearing a short-sleeved shirt buttoned up to the collar and dark shorts or trousers. A belt is also visible, helping to keep the shorts or trousers in place.
   - **Footwear:** The child is wearing dark-colored shoes that look formal and polished, complementing the outfit.
   - **Hair:** The child has curly hair, adding to the overall charming and lively presentation.

2. **Posture and Expressi

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/17l.jpg 
 Certainly! Let's take a closer look at the photographic sample book and analyze the photograph depicted on the open page titled "REKORD-RAPID 119".

**Overall Description:**

The photograph is a monochrome (black and white) image. It features a woman, presumably in her mid-20s to early 40s, standing confidently in what appears to be an industrial setting. The composition is dynamic, with the woman positioned slightly off-center to the right, and the background and objects around her help to frame and add context to the image.

**Details of the Woman:**

1. **Pose and Attire:**
   - The woman is standing with her left hand on her hip, projecting a strong and confident demeanor.
   - She is dressed in a sleeveless dress that reaches her knees. The dress is fitted, likely a medium to light weight fabric.
   - Her hair is short and styled in a manner typical of the mid-20th century, possibly indicating the photograph’s vintage context.

2. **Facial E

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/286t.jpg 
 The photograph in the sample book features a young woman standing by a white picket fence. She is dressed in a light, possibly floral, dress with a fitted bodice and a full skirt that reaches her knees. Her hair is styled in a neat, medium-length bob, and she has a pleasant expression on her face, possibly smiling. 

The background shows a well-maintained garden with trees and shrubs, giving a sense of a suburban setting. The lighting in the photograph is natural, likely taken during the daytime, casting soft shadows and highlighting the details of the clothing and the foliage in the background.

The image appears to be taken in black and white, with a focus on capturing the contrast and shades accurately. The photo is identified as being printed on ANSCO Convira (Violet D.W. Grades 7 1/4 x 5 3/4), which indicates the type of photographic paper used, suggesting a high-quality print.

Overall, the photograph exemplifies classic mid-20th century p

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/287g.jpg 
 The photograph, located on an open page of a sample book, depicts two young women leaning against a large window with multiple panes. They are positioned side by side, with one of them sitting lower than the other. Both are dressed in a style reminiscent of punk or grunge fashion. 

The woman on the left is seated, her legs crossed, and she props her right arm on her raised knee. She has long, dark, slightly disheveled hair that falls over her face, partly obscuring it. She is wearing a dark-colored jacket with a patterned design, light-colored jeans, and black ankle boots. She gazes off into the distance, exuding a relaxed, contemplative demeanor.

The woman on the right stands with her left knee slightly bent, one hand on her hip, and the other supporting herself against the window frame. She dons a black, textured leather jacket and distressed white jeans that are frayed and ripped at the knees. Her footwear consists of black ankle boots, sim

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/3189j.jpg 
 The photograph in the image appears to be a vintage, black-and-white landscape image. It depicts a gravel road surrounded by open fields, with gentle hills stretching in the distance. People can be seen walking on the road, possibly engaged in agricultural activities, and there are utility poles lining the side of the road. The photograph is bordered with a thin, white frame and is presented on a page with a pale, textured background. Below the photograph, there is text that seems to reference a specific photograph or page number (possibly "ROYAL VILLES - CONTP. No. 2"), indicating its source or location in the photo book. This suggests it's part of a larger collection of images. 




Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4911d.jpg 
 The photograph in the sample book captures a pastoral scene set against a large, leafy tree which dominates the backdrop. The scene depicts a rustic environment, likely from a time in history characterized by simpler, rural lifestyles.

In the foreground, two bovines are central to the image -- a cow and an ox, each exhibiting distinctive markings. The cow, positioned on the left side of the image, stands slightly behind the ox with its head turned towards the ox, revealing a gentle demeanor. The ox, on the right side, is in profile and appears to be calmly poised in front of the cow, with its large horns directed forwards. Both animals are depicted in a natural setting, which enhances the tranquil ambiance of the scene.

Adjacent to the animals, on the left side, there is a small structure that resembles a thatched hut. The roof of the hut is indicative of traditional construction methods commonly used in rural or village settings. A few slend

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4785a.jpg 
 The image showcases a photographic sample book opened to a page containing a photograph. Let's delve into the intricate details of the photograph:

1. **Subject**:
   - The photograph features a figure lying down in a relaxed, perhaps dreamlike or serene pose. This figure occupies a significant portion of the image and is the central focus. 
   - The face of the individual is visible, with delicate facial features and a calm expression, adding to the tranquil mood of the image.

2. **Color and Tone**:
   - The photograph predominantly employs muted, cool tones, including shades of blues, whites, and soft grays. These colors contribute to the ethereal and peaceful atmosphere of the image.
   - The use of light and shadow enhances the visual depth, giving the photograph a slightly surreal quality. There is a delicate interplay between light areas and darker regions, emphasizing the contours and textures within the photograph.

3. **Composition**:

Setting `pad_token_id` to `eos_token_id`:128003 for open-end generation.


https://samplebook.photos/img/4793a.jpg 
 The photograph in the sample book depicts a person engaged in a domestic activity. Here are the detailed observations:

1. **Subject of the Photograph**: The main subject of the photograph is a seated individual, presumably a woman given the visible attire and hairstyle. She appears to be involved in laundry work.

2. **Activity Depicted**: The individual is positioned over what looks like a washbasin or a large bucket, indicating that she is washing clothes. Her arms and upper body are involved in the movement, suggesting she might be wringing out or scrubbing the garments. The action captured seems vigorous, as evidenced by the water splashes around.

3. **Attire**: The person is dressed in a light-colored, possibly striped garment. The attire looks practical, suitable for physical work and possibly made from a material that can endure getting wet.

4. **Props and Environment**: The subject is using some form of a washboard or a scrubbing imp

In [18]:
import pickle

In [19]:
with open("responses_intern.pkl", "wb") as f:
    pickle.dump(responses_intern, f)