In [38]:
from openai import OpenAI
client = OpenAI()

replace_obj_prompt = """
Given an input sentence describing a scene, your task is to:
1. Locate the noun words in the sentence.
2. Randomly pick one noun word.
3. Replace the selected noun word with a new noun word to make a new sentence.

The new sentence must meet the following three requirements:
1. The new sentence must be describing a scene that is as different as possible from the original scene.
2. The new sentence must be fluent and grammatically correct.
3. The new sentence must make logical sense.

Here are some examples:

Original sentence: A man is in a kitchen making pizzas.
Nouns: ["man", "kitchen", "pizzas"]
Selected noun: man
New noun: woman
New sentence: A woman is in a kitchen making pizzas.

Original sentence: a woman seated on wall and birds besides her
Nouns: ["woman", "wall", "birds"]
Selected noun: wall
New noun: bench
New sentence: A woman seated on a bench and birds besides her.
"""

swap_obj_prompt = """
Given an input sentence describing a scene, your task is to first locate two swappable noun phrases in the sentence, and then swap them to make a new sentence. The new sentence must meet the following three requirements:
1. The new sentence must be describing a different scene from the input sentence.
2. The new sentence must be fluent and grammatically correct.
3. The new sentence must make logical sense.

To complete the task, you should:
1. Answer the question of whether generating such a new sentence is possible using Yes or No.
2. Output the swappable noun phrases.
3. Swap them to make a new sentence.

Here are some examples:

Input: A cat resting on a laptop next to a person.
Is it possible to swap noun phrases in the input sentence to generate a new sentence that is different from the input sentence and makes logical sense? Yes
Swappable noun phrases: laptop, person
Output: A cat resting on a person next to a laptop.

Input: A plate of donuts with a person in the background.
Is it possible to swap noun phrases in the input sentence to generate a new sentence that is different from the input sentence and makes logical sense? Yes
Swappable noun phrases: a plate of donuts, a person
Output: A person with a plate of donuts in the background.
"""

# user_prompt = "Generate a new sentence for the following caption: {}"

user_prompt = """
Generate a new sentence for each of the following captions:
{}
{}
{}
{}
{}
"""

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  
  # messages=[
  #   {"role": "system", "content": "You are a language assistant, skilled in generating negative caption for an image, given the positive caption for that image. A negative caption describes a scene different from the positive caption"},
  #   {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  # ]

  messages=[
    {"role": "system", "content": replace_obj_prompt},
    {"role": "user", "content": user_prompt.format('A man riding a wave on a surfboard in the ocean.', 'The pizza is covered by many different toppings.', 'A kitchen with a white sink and a stove top oven.', 'a close up of a person using a laptop', 'A person leading a brown horse across a field.')}
  ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content='Original sentence: A man riding a wave on a surfboard in the ocean.\nNouns: ["man", "wave", "surfboard", "ocean"]\nSelected noun: man\nNew noun: woman\nNew sentence: A woman riding a wave on a surfboard in the ocean.\n\nOriginal sentence: The pizza is covered by many different toppings.\nNouns: ["pizza", "toppings"]\nSelected noun: pizza\nNew noun: sandwich\nNew sentence: The sandwich is covered by many different toppings.\n\nOriginal sentence: A kitchen with a white sink and a stove top oven.\nNouns: ["kitchen", "sink", "stove top oven"]\nSelected noun: kitchen\nNew noun: bedroom\nNew sentence: A bedroom with a white sink and a stove top oven.\n\nOriginal sentence: A person leading a brown horse across a field.\nNouns: ["person", "horse", "field"]\nSelected noun: field\nNew noun: forest\nNew sentence: A person leading a brown horse across a forest.\n\nOriginal sentence: A close up of a person using a laptop.\nNouns: ["person", "laptop"]\nSelected noun: l

In [39]:
print(completion)
print(completion.choices[0].message.content)

ChatCompletion(id='chatcmpl-9EXZZiE24orpUvH3AMtdUjhWAO1rp', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Original sentence: A man riding a wave on a surfboard in the ocean.\nNouns: ["man", "wave", "surfboard", "ocean"]\nSelected noun: man\nNew noun: woman\nNew sentence: A woman riding a wave on a surfboard in the ocean.\n\nOriginal sentence: The pizza is covered by many different toppings.\nNouns: ["pizza", "toppings"]\nSelected noun: pizza\nNew noun: sandwich\nNew sentence: The sandwich is covered by many different toppings.\n\nOriginal sentence: A kitchen with a white sink and a stove top oven.\nNouns: ["kitchen", "sink", "stove top oven"]\nSelected noun: kitchen\nNew noun: bedroom\nNew sentence: A bedroom with a white sink and a stove top oven.\n\nOriginal sentence: A person leading a brown horse across a field.\nNouns: ["person", "horse", "field"]\nSelected noun: field\nNew noun: forest\nNew sentence: A person leading a brown 

In [45]:
import re
def extract_outputs(input_string):
    outputs = []
    # Split the input string into sections for each case using regex to split by number prefix
    cases = re.split(r'\nOriginal sentence:', input_string)
    print(len(cases))
    for case in cases:
        # Find the output sentence using a regex pattern that captures the text after "Output:"
        match = re.search(r'New sentence:\s*(.*)', case)
        if match:
            outputs.append(match.group(1).strip())
    return outputs

# Using the function to extract outputs
output_sentences = extract_outputs(completion.choices[0].message.content)
print(output_sentences)

5
['A woman riding a wave on a surfboard in the ocean.', 'The sandwich is covered by many different toppings.', 'A bedroom with a white sink and a stove top oven.', 'A person leading a brown horse across a forest.', 'A close up of a person using a camera.']


In [29]:
import re
def extract_outputs(input_string):
    outputs = []
    # Split the input string into sections for each case using regex to split by number prefix
    cases = re.split(r'\n\d+\.\sInput:', input_string)
    for case in cases:
        if "Yes" in case:
            # Find the output sentence using a regex pattern that captures the text after "Output:"
            match = re.search(r'Output:\s*(.*)', case)
            if match:
                outputs.append(match.group(1).strip())
    return outputs

# Using the function to extract outputs
output_sentences = extract_outputs(completion.choices[0].message.content)
print(output_sentences)

['A wave riding a man on a surfboard in the ocean.', 'Many different toppings are covered by the pizza.', 'A kitchen with a stove top oven and a white sink.', 'A close up of a laptop using a person.']


In [3]:
import json
import os

def get_text_from_file(filename):
  with open(filename, 'r') as file:
    text = file.read()
  return text

def get_images_and_captions_for_coco(filename, caption_store):
  file_text = get_text_from_file(filename)
  all_image_filenames = file_text.split('\n')
  all_image_filenames = [os.path.basename(filename) for filename in all_image_filenames]
  print(len(all_image_filenames[:-1]))
  image_caption_store = {}

  for item in caption_store['annotations']:
    caption_store_image_id = str(item['image_id'])
    if caption_store_image_id not in image_caption_store:
      image_caption_store[caption_store_image_id] = []
    image_caption_store[caption_store_image_id].append(item['caption'])

  print(image_caption_store['353850'])
  return all_image_filenames[:-1], image_caption_store


with open("annotations_train2017/annotations/captions_train2017.json", 'rb') as file:
  caption_store = json.load(file)

image_filenames, image_caption_store = get_images_and_captions_for_coco("coco-minitrain/coco25k.txt", caption_store)
print(image_filenames)
print(image_caption_store['126992'])

25000
['a man is repairing a small white airplane outdoors', "A man standing on a step stool looks at an airplane's roof.", 'A man standing on a step ladder working on a small airplane on the runway.', 'A man standing next to a white airplane with blue and red stripes. ', 'Small airplanes do require regular maintenance and repairs.']
['000000484136.jpg', '000000225463.jpg', '000000173620.jpg', '000000353850.jpg', '000000147568.jpg', '000000311488.jpg', '000000309323.jpg', '000000089754.jpg', '000000485172.jpg', '000000126992.jpg', '000000264855.jpg', '000000495356.jpg', '000000110724.jpg', '000000263006.jpg', '000000515457.jpg', '000000463593.jpg', '000000552446.jpg', '000000428576.jpg', '000000544611.jpg', '000000562171.jpg', '000000435359.jpg', '000000541092.jpg', '000000387704.jpg', '000000016531.jpg', '000000131011.jpg', '000000385326.jpg', '000000252979.jpg', '000000105418.jpg', '000000270999.jpg', '000000088507.jpg', '000000576089.jpg', '000000431708.jpg', '000000045494.jpg', '00

In [9]:
import random

selected_image_filenames = random.sample(image_filenames, 15000)

final_image_set = []
final_image_set.extend(selected_image_filenames)

In [30]:
# with open("final_image_set.txt", "w") as file:
#     for item in final_image_set:
#         file.write(f"{item}\n")  # Write each item on a new line

In [2]:
# final_image_set = []

# # Open the file in read mode
# with open("final_image_set_6000.txt", "r") as file:
#     for line in file:
#         # Strip the newline character and any other trailing whitespace
#         clean_line = line.strip()
#         # Append the cleaned line to the list
#         final_image_set.append(clean_line)

# # Now 'retrieved_list' contains all the items of the file as elements of the list
# print(final_image_set)

['000000441828.jpg', '000000274054.jpg', '000000332537.jpg', '000000159971.jpg', '000000355830.jpg', '000000027055.jpg', '000000356071.jpg', '000000112363.jpg', '000000157371.jpg', '000000516771.jpg', '000000510676.jpg', '000000191169.jpg', '000000200671.jpg', '000000391415.jpg', '000000123692.jpg', '000000129448.jpg', '000000192958.jpg', '000000510592.jpg', '000000472316.jpg', '000000273653.jpg', '000000010645.jpg', '000000332663.jpg', '000000437530.jpg', '000000106375.jpg', '000000190868.jpg', '000000135800.jpg', '000000055447.jpg', '000000561172.jpg', '000000052267.jpg', '000000033756.jpg', '000000493793.jpg', '000000463593.jpg', '000000041773.jpg', '000000066798.jpg', '000000270609.jpg', '000000234612.jpg', '000000062566.jpg', '000000200477.jpg', '000000542694.jpg', '000000211325.jpg', '000000018000.jpg', '000000380657.jpg', '000000290522.jpg', '000000446813.jpg', '000000561923.jpg', '000000504069.jpg', '000000471421.jpg', '000000094723.jpg', '000000549879.jpg', '000000329105.jpg',

In [4]:
len(final_image_set)
print(final_image_set)

['000000441828.jpg', '000000274054.jpg', '000000332537.jpg', '000000159971.jpg', '000000355830.jpg', '000000027055.jpg', '000000356071.jpg', '000000112363.jpg', '000000157371.jpg', '000000516771.jpg', '000000510676.jpg', '000000191169.jpg', '000000200671.jpg', '000000391415.jpg', '000000123692.jpg', '000000129448.jpg', '000000192958.jpg', '000000510592.jpg', '000000472316.jpg', '000000273653.jpg', '000000010645.jpg', '000000332663.jpg', '000000437530.jpg', '000000106375.jpg', '000000190868.jpg', '000000135800.jpg', '000000055447.jpg', '000000561172.jpg', '000000052267.jpg', '000000033756.jpg', '000000493793.jpg', '000000463593.jpg', '000000041773.jpg', '000000066798.jpg', '000000270609.jpg', '000000234612.jpg', '000000062566.jpg', '000000200477.jpg', '000000542694.jpg', '000000211325.jpg', '000000018000.jpg', '000000380657.jpg', '000000290522.jpg', '000000446813.jpg', '000000561923.jpg', '000000504069.jpg', '000000471421.jpg', '000000094723.jpg', '000000549879.jpg', '000000329105.jpg',

In [5]:
print(image_caption_store['441828'])
print(image_caption_store['274054'])
print(image_caption_store['332537'])
print(image_caption_store['159971'])
print(image_caption_store['355830'])

['A man riding a wave on a surfboard in the ocean.', 'A man on a surfboard riding an ocean wave.', 'A surfer in a wet suit near the shore on his surfboard', 'A MAN IS SURFING ON THE WAVES OF THE OCEAN', 'A man is on a standing on a surf board while splashing through some small waves in the lake.']
['The pizza is covered by many different toppings.', 'A home made pizza loaded plenty of vegetables.', 'Someone holds a pizza with vegetable toppings over a stove.', 'There is a pizza with vegetable toppings on it.', 'A burnt pizza covered in cheese and toppings.']
['A kitchen with a white sink and a stove top oven.', 'black and white photo of a bathroom and sink.', 'A kitchen that has pots on the stove.', 'A black and white image of a very old looking kitchen. ', 'A vintage kitchen with stove and pots and pans.']
['a close up of a person using a laptop', "A laptop opened on a page and hand on it's mousepad.", 'person looking at a webpage on a laptop computer', 'A person is sitting on the flo

In [None]:
user_prompt.format('A man riding a wave on a surfboard in the ocean.', 'The pizza is covered by many different toppings.', 'A kitchen with a white sink and a stove top oven.', 'a close up of a person using a laptop', 'A person leading a brown horse across a field.')

In [34]:
def generate_dynamic_prompt(number_of_items):
    # Generate the appropriate format string with the right number of placeholders
    placeholders = "\n".join(["{}" for _ in range(number_of_items)])
    # Create the prompt template with dynamic placeholders
    prompt_template = """
Generate a new sentence for each of the following captions:
{}
    """.format(placeholders)
    
    return prompt_template

# Example usage:
prompt_5 = generate_dynamic_prompt(5)
prompt_6 = generate_dynamic_prompt(6)
prompt_10 = generate_dynamic_prompt(10)

print("Prompt for 5 items:", prompt_5)
print(prompt_5.format("1","2","3","4","5","6"))
#print("Prompt for 6 items:", prompt_6)
#print("Prompt for 10 items:", prompt_10)

Prompt for 5 items: 
Generate a new sentence for each of the following captions:
{}
{}
{}
{}
{}
    

Generate a new sentence for each of the following captions:
1
2
3
4
5
    


In [6]:
import random

def select_random_caption(image_captions):
    selected_captions = random.sample(image_captions, 1)
    return selected_captions[0]

def get_image_id(image_filename):
    image_id = image_filename.replace(".jpg", "").lstrip('0')
    return image_id

def generate_complete_user_prompt(captions):
    placeholders = "\n".join(["{}" for _ in captions])
    prompt_template = """
Generate a new sentence for each of the following captions:
{}
    """.format(placeholders).format(*captions)
    
    return prompt_template


In [7]:
from openai import OpenAI

client = OpenAI()

def get_gpt_response(system_prompt, user_prompt):
    completion = client.chat.completions.create(
    model="gpt-3.5-turbo",

    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    )

    return completion.choices[0].message.content

In [11]:

import re

replace_obj_prompt = """
Given an input sentence describing a scene, your task is to:
1. Locate the noun words in the sentence.
2. Randomly pick one noun word.
3. Replace the selected noun word with a new noun word to make a new sentence.

The new sentence must meet the following three requirements:
1. The new sentence must be describing a scene that is as different as possible from the original scene.
2. The new sentence must be fluent and grammatically correct.
3. The new sentence must make logical sense.

Here are some examples:

Original sentence: A man is in a kitchen making pizzas.
Nouns: ["man", "kitchen", "pizzas"]
Selected noun: man
New noun: woman
New sentence: A woman is in a kitchen making pizzas.

Original sentence: a woman seated on wall and birds besides her
Nouns: ["woman", "wall", "birds"]
Selected noun: wall
New noun: bench
New sentence: A woman seated on a bench and birds besides her.
"""

replace_att_prompt = """
Given an input sentence describing a scene, your task is to:
1. Locate the adjective words describing objects in the sentence. If there are no adjective words, return an empty list.
2. Randomly pick one adjective word.
3. Replace the selected adjective word with a new adjective word to make a new sentence.

The new sentence must meet the following three requirements:
1. The new sentence must be describing a scene that is as different as possible from the original scene.
2. The new sentence must be fluent and grammatically correct.
3. The new sentence must make logical sense.

Here are some examples:

Original sentence: a blue bike parked on a side walk.
Adjectives: ["blue"]
Selected adjective: blue
New adjective: red
New sentence: a red bike parked on a side walk.

Original sentence: The kitchen is clean and ready for us to see.
Adjectives: ["clean", "ready"]
Selected adjective: clean
New adjective: dirty
New sentence: The kitchen is dirty and ready for us to see.
"""

swap_obj_prompt = """
Given an input sentence describing a scene, your task is to first locate two swappable noun phrases in the sentence, and then swap them to make a new sentence. The new sentence must meet the following three requirements:
1. The new sentence must be describing a different scene from the input sentence.
2. The new sentence must be fluent and grammatically correct.
3. The new sentence must make logical sense.

To complete the task, you should:
1. Answer the question of whether generating such a new sentence is possible using Yes or No.
2. Output the swappable noun phrases.
3. Swap them to make a new sentence.

Here are some examples:

Input: A cat resting on a laptop next to a person.
Is it possible to swap noun phrases in the input sentence to generate a new sentence that is different from the input sentence and makes logical sense? Yes
Swappable noun phrases: laptop, person
Output: A cat resting on a person next to a laptop.

Input: A plate of donuts with a person in the background.
Is it possible to swap noun phrases in the input sentence to generate a new sentence that is different from the input sentence and makes logical sense? Yes
Swappable noun phrases: a plate of donuts, a person
Output: A person with a plate of donuts in the background.
"""

def extract_outputs_for_replace_obj(input_string):
    outputs = []
    # Split the input string into sections for each case using regex to split by number prefix
    cases = re.split(r'\n\d.\.\sOriginal sentence:', input_string)
    # print(len(cases))
    for case in cases:
        # Find the output sentence using a regex pattern that captures the text after "Output:"
        match = re.search(r'New sentence:\s*(.*)', case)
        if match:
            outputs.append(match.group(1).strip())
    return outputs

def extract_new_sentences_for_replace_obj(text):
    # Use regex to find all occurrences of "New sentence:" followed by any text until the end of the line
    pattern = re.compile(r"New sentence: (.*)\n?")
    matches = pattern.findall(text)
    return matches

def extract_new_sentences_for_replace_att(text):
    # Use regex to find all occurrences of "New sentence:" followed by any text until the end of the line
    pattern = re.compile(r"New sentence: (.*)\n?")
    matches = pattern.findall(text)
    return matches

def extract_outputs_for_swap_obj(input_string):
    outputs = []
    # Split the input string into sections for each case using regex to split by number prefix
    cases = re.split(r'\n\d+\.\sInput:', input_string)
    for case in cases:
        if "Yes" in case:
            # Find the output sentence using a regex pattern that captures the text after "Output:"
            match = re.search(r'Output:\s*(.*)', case)
            if match:
                outputs.append(match.group(1).strip())
        else:
            outputs.append("")
    return outputs

In [19]:
import json

batch_size = 5
gpt_results = []
replace_obj_dict = {}

for i in range(0, len(final_image_set), batch_size):
    current_image_filenames = final_image_set[i:i + batch_size]
    current_batch = [get_image_id(image_filename) for image_filename in final_image_set[i:i + batch_size]]
    input_captions = [select_random_caption(image_caption_store[image_id]) for image_id in current_batch]
    user_prompt = generate_complete_user_prompt(input_captions)
    # print(generate_complete_user_prompt(input_captions))
    gpt_result = get_gpt_response(replace_att_prompt, user_prompt)
    gpt_results.append(gpt_result)
    # print(gpt_result)
    batch_negative_captions = extract_new_sentences_for_replace_att(gpt_result)
    # print(current_image_filenames)
    # print(input_captions)
    # print(batch_negative_captions)

    for (image_id, image_filename, pos_caption, neg_caption) in zip(current_batch, current_image_filenames, input_captions, batch_negative_captions):
        if neg_caption is None or len(neg_caption) == 0 or 'adjectives' in neg_caption:
            continue
        temp_dict = {}
        temp_dict["filename"] = image_filename
        temp_dict["pos_caption"] = pos_caption
        temp_dict["neg_caption"] = neg_caption
        replace_obj_dict[image_id] = temp_dict

    with open("replace_att_temp_values.json", "w") as file:
        # Dump the dictionary to the file in JSON format
        json.dump(replace_obj_dict, file) 

    # print(input_captions)

# print(current_batch)

In [62]:
# extract_outputs_for_replace_obj(gpt_result)

def extract_new_sentences_for_replace_obj(text):
    # Use regex to find all occurrences of "New sentence:" followed by any text until the end of the line
    pattern = re.compile(r"New sentence: (.*)\n?")
    matches = pattern.findall(text)
    return matches

# Extract "New sentence" values using the function
new_sentences = extract_new_sentences(gpt_result)
print(len(new_sentences))
print(new_sentences)

10
['A man is on a standing on a surf board while splashing through some small waves in the pool.', 'A home made salad loaded plenty of vegetables.', 'A black and white image of a very old looking barn.', "A laptop opened on a page and cat on it's mousepad.", 'A women holding a leash while walking a large horse in the grass.', 'A blue bridge sitting on a lush green path with white sheep sitting behind it.', 'A counter in a cafe with a mocha latte and a cheese sandwich.', 'A large red hot air balloon traveling along a river.', 'A man and a woman sitting in chairs having a conversation overlooking a mountain.', 'A group of people in a courtyard next to a fountain.']
