In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI

In [3]:
load_dotenv()

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)
client.api_key

'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'

In [4]:
from PIL import Image

def create_expanded_and_mask_images(base_image_path, res):
    """
    Function to create an expanded image with a white background and a mask image.
    
    Parameters:
    - base_image_path: str, the path to the base image.
    - res: int, resolution of the output images, can be 256, 512, or 1024.
    
    Returns:
    - expanded_image_path: str, the path to the expanded image.
    - correct_mask_image_path: str, the path to the mask image.
    """
    # Validate the resolution
    if res not in [256, 512, 1024]:
        raise ValueError("Resolution must be one of 256, 512, or 1024.")
    
    # Load the image
    base_image = Image.open(base_image_path)

    # Create a new image with white background and the specified resolution
    new_image = Image.new("RGB", (res, res), "white")
    new_image.paste(base_image, (int((res - base_image.width) / 2), int((res - base_image.height) / 2)))

    # Save the new image as PNG
    expanded_image_path = f'expanded_base_{res}.png'
    new_image.save(expanded_image_path, "PNG")

    # Create a mask with transparent areas where the original image is not present
    base_mask = base_image.split()[-1].point(lambda x: 255 if x > 0 else 0)
    correct_mask = Image.new("RGBA", (res, res), (0, 0, 0, 0))
    correct_mask.paste(base_mask, (int((res - base_image.width) / 2), int((res - base_image.height) / 2)), mask=base_mask)

    # Save the correct mask image
    correct_mask_image_path = f'mask_base_{res}.png'
    correct_mask.save(correct_mask_image_path, "PNG")
    
    return expanded_image_path, correct_mask_image_path

In [17]:
create_expanded_and_mask_images('base2.png', 1024)

('expanded_base_1024.png', 'mask_base_1024.png')

In [18]:
import base64
import requests


# OpenAI API Key
api_key = client.api_key

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "base2.png"

# Getting the base64 string
base64_image = encode_image(image_path)

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

payload = {
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "What's in the image, do not use full sentences, just describe the objects, ingore any watermarks or text on the image."
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
          }
        }
      ]
    }
  ],
  "max_tokens": 50
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

print(response.json())

{'id': 'chatcmpl-8WxU87JNbvuivZSic2VwfN9ycWRRf', 'object': 'chat.completion', 'created': 1702865848, 'model': 'gpt-4-1106-vision-preview', 'usage': {'prompt_tokens': 800, 'completion_tokens': 22, 'total_tokens': 822}, 'choices': [{'message': {'role': 'assistant', 'content': 'Sphinx statue, bird in flight, futuristic cityscape, skyscrapers, clouds, reflective water surface.'}, 'finish_reason': 'stop', 'index': 0}]}


In [19]:

pics = client.images.edit(
  image=open("expanded_base_1024.png", "rb"),
  mask=open("mask_base_1024.png", "rb"),
  # prompt="A young adult male with light blonde hair and a gentle expression, wearing a vibrant red puffer jacket over a black and white striped shirt",
  prompt="Sphinx statue, bird in flight, futuristic cityscape, skyscrapers, clouds, reflective water surface",
  n=2,
  size="1024x1024"
)

In [21]:
pics

ImagesResponse(created=1702865885, data=[Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-BMyObFCyK3Rb8g2wkaYeYu12/user-CRJfQDcVUQTJQmD4Vv1PSyQL/img-aYy1jGRl8m7s3gnfNzJmRI7q.png?st=2023-12-18T01%3A18%3A04Z&se=2023-12-18T03%3A18%3A04Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-12-17T18%3A52%3A37Z&ske=2023-12-18T18%3A52%3A37Z&sks=b&skv=2021-08-06&sig=Di1R/WLNH/X7c9F35bX1qTsZiPDSy2zomMUwm9mn/yA%3D'), Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-BMyObFCyK3Rb8g2wkaYeYu12/user-CRJfQDcVUQTJQmD4Vv1PSyQL/img-RWG0z9M1ZmwKEYB8sexJ7vOw.png?st=2023-12-18T01%3A18%3A05Z&se=2023-12-18T03%3A18%3A05Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-12-17T18%3A52%3A37Z&ske=2023-12-18T18