In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI

In [3]:
load_dotenv()

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [4]:
from PIL import Image

def create_expanded_and_mask_images(base_image_path, res):
    """
    Function to create an expanded image with a white background and a mask image.
    
    Parameters:
    - base_image_path: str, the path to the base image.
    - res: int, resolution of the output images, can be 256, 512, or 1024.
    
    Returns:
    - expanded_image_path: str, the path to the expanded image.
    - correct_mask_image_path: str, the path to the mask image.
    """
    # Validate the resolution
    if res not in [256, 512, 1024]:
        raise ValueError("Resolution must be one of 256, 512, or 1024.")
    
    # Load the image
    base_image = Image.open(base_image_path)

    # Create a new image with white background and the specified resolution
    new_image = Image.new("RGB", (res, res), "white")
    new_image.paste(base_image, (int((res - base_image.width) / 2), int((res - base_image.height) / 2)))

    # Save the new image as PNG
    expanded_image_path = f'expanded_base_{res}.png'
    new_image.save(expanded_image_path, "PNG")

    # Create a mask with transparent areas where the original image is not present
    base_mask = base_image.split()[-1].point(lambda x: 255 if x > 0 else 0)
    correct_mask = Image.new("RGBA", (res, res), (0, 0, 0, 0))
    correct_mask.paste(base_mask, (int((res - base_image.width) / 2), int((res - base_image.height) / 2)), mask=base_mask)

    # Save the correct mask image
    correct_mask_image_path = f'mask_base_{res}.png'
    correct_mask.save(correct_mask_image_path, "PNG")
    
    return expanded_image_path, correct_mask_image_path

In [18]:
create_expanded_and_mask_images('base.png', 1024)

('expanded_base_1024.png', 'mask_base_1024.png')

In [14]:
import base64
import requests


# OpenAI API Key
api_key = client.api_key

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "base2.png"

In [12]:
def get_img_desc(image_path):
    """
    Function to get the image description from the OpenAI API.
    
    Parameters:
    - image_path: str, the path to the image.
    
    Returns:
    - img_desc: str, the image description.
    """
    # Encode the image
    base64_image = encode_image(image_path)
    
    # Set the headers
    headers = {
      "Content-Type": "application/json",
      "Authorization": f"Bearer {api_key}"
    }
    
    # Set the payload
    payload = {
      "model": "gpt-4-vision-preview",
      "messages": [
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": "What's in the image, do not use full sentences, just describe the objects, ingore any watermarks or text on the image."
            },
            {
              "type": "image_url",
              "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
                "detail": "low"
              }
            }
          ]
        }
      ],
      "max_tokens": 65
    }
    
    # Send the request
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    # Get the image description
    img_desc = response.json()['choices'][0]['message']['content']
    
    return img_desc

In [13]:
get_img_desc('base.png')

'Person, red puffer jacket, striped garment, light-colored hair, white background, decorative red item.'

In [30]:
def get_expanded_img_url():
  pics = client.images.edit(
    image=open("expanded_base_1024.png", "rb"),
    mask=open("mask_base_1024.png", "rb"),
    prompt="Person, red puffer jacket, striped garment, light-colored hair, white background, decorative red item",
    n=1,
    size="1024x1024"
  )

  url = pics.data[0].url

  return url

In [20]:
pics = get_expanded_img_url()

In [21]:
pics

ImagesResponse(created=1703467173, data=[Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-BMyObFCyK3Rb8g2wkaYeYu12/user-CRJfQDcVUQTJQmD4Vv1PSyQL/img-Od80zYXe9j3hUFRsm2fNfZ7m.png?st=2023-12-25T00%3A19%3A33Z&se=2023-12-25T02%3A19%3A33Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-12-24T08%3A59%3A51Z&ske=2023-12-25T08%3A59%3A51Z&sks=b&skv=2021-08-06&sig=9XsofbDsMLTyE0OKM9QuJ4B6iaHwKXeiq5sGOpVPQ1g%3D')])

In [28]:
# Assuming 'response' is your response object
url = pics.data[0].url
url

'https://oaidalleapiprodscus.blob.core.windows.net/private/org-BMyObFCyK3Rb8g2wkaYeYu12/user-CRJfQDcVUQTJQmD4Vv1PSyQL/img-Od80zYXe9j3hUFRsm2fNfZ7m.png?st=2023-12-25T00%3A19%3A33Z&se=2023-12-25T02%3A19%3A33Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-12-24T08%3A59%3A51Z&ske=2023-12-25T08%3A59%3A51Z&sks=b&skv=2021-08-06&sig=9XsofbDsMLTyE0OKM9QuJ4B6iaHwKXeiq5sGOpVPQ1g%3D'

In [29]:
type(url)

str