## Image Description

In [22]:
# Access the images
import os

path_to_images = '' # Update folder with images
images = [path_to_images+i for i in os.listdir(path_to_images)]
images

['/content/drive/MyDrive/Listed Assignment/AI ML Assignment Pictures/Image1.png',
 '/content/drive/MyDrive/Listed Assignment/AI ML Assignment Pictures/Image3.png',
 '/content/drive/MyDrive/Listed Assignment/AI ML Assignment Pictures/Image2.png']

### Image describer model import 

In [4]:
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

# Import the BLIP image captioning model from huggingface
# Link - https://huggingface.co/Salesforce/blip-image-captioning-large/discussions
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")

Downloading (…)rocessor_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/456 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [23]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Import BLIP model and load into available device
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)

### Description function

In [6]:
def get_description(image_paths):
  descriptions = []
  for path in image_paths: 
    # Read image
    raw_image = Image.open(path).convert('RGB')
    
    # Preprocess
    inputs = processor(raw_image, return_tensors="pt").to("cuda")

    # Image Description generator
    out = model.generate(**inputs)

    # Description post-processing
    description = processor.decode(out[0], skip_special_tokens=True)
    descriptions.append(description)

  return descriptions  

In [24]:
descriptions = get_description(images)
descriptions

['arafed soccer player running on a soccer field with a ball',
 'a close up of a poster with a bunch of people',
 'there are two horses standing together in a field under a cloudy sky']

## Description to Caption using GPT-3.5 Turbo

In [8]:
import openai

In [9]:
openai.api_key = '' # Add your key here

In [18]:
# https://platform.openai.com/docs/api-reference/chat/create

# Given an image description, make a caption out of it
def get_caption_for(text, num_captions=1):
  response = 'caption from description'
  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        # System messages are a way to set the model into a perspective
          {"role": "system", "content": "You are a catchy, exciting, innovative, captivating, creative and engaging social media caption generator."},
        # User messages are the normal ChatGPT chat-like messages or prompts which we as users give
          {"role": "user", "content": f"Generate a caption if the image is described as: {text}"}
      ],
    # Temperature is a way of randomizing the response of the model. Higher temperature makes the output more random and low does the opposite.
    temperature= 1.2,
    # We can specify n and generate a custom number of outputs
    n = num_captions
  )

  return response

In [19]:
'''
EXAMPLE OBJECT:

caption_object = <OpenAIObject chat.completion id=chatcmpl-7FMgxLdn3SOt4bMtE8FNBWJJl8qCm at 0x7f8521ee8540> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "\"Kick it like you mean it! \u26bd\ufe0f\ud83d\udd25 Let's score big in this game.\" #soccer #football #teamspirit",
        "role": "assistant"
      }
    }
  ],
  "created": 1683896503,
  "id": "chatcmpl-7FMgxLdn3SOt4bMtE8FNBWJJl8qCm",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 31,
    "prompt_tokens": 51,
    "total_tokens": 82
  }
}
'''

# Set the number of captions you want to get from the model
num_captions = [3, 1, 2]

# Save all caption objects in a list
caption_objects = []

# Traverse descriptions and generate a list of captions
for i, description in enumerate(descriptions):
  caption_object = get_caption_for(description, num_captions[i])
  caption_objects.append(caption_object)

In [20]:
# What the caption objects look like
caption_objects

[<OpenAIObject chat.completion id=chatcmpl-7FOdrkyG89943JYcwoWs6yjeZn9kz at 0x7f0516c89670> JSON: {
   "choices": [
     {
       "finish_reason": "stop",
       "index": 0,
       "message": {
         "content": "\"Stepping up our skills, one dribble at a time \ud83d\ude4c\u26bd\ufe0f #SoccerFever #BallIsLife #GameOn #ArafedAthlete\"",
         "role": "assistant"
       }
     },
     {
       "finish_reason": "stop",
       "index": 1,
       "message": {
         "content": "\"The thrill of the game fuels every step. \u26bd\ufe0f\ud83d\udd25 #SoccerLife #GameOn #AthleticFeats #FieldOfDreams\"",
         "role": "assistant"
       }
     },
     {
       "finish_reason": "stop",
       "index": 2,
       "message": {
         "content": "\"Watch out world, Arafed is here to score and conquer the field! \ud83c\udf1f\u26bd\ufe0f #SoccerStar #FieldFrenzy #RisingAthlete\"",
         "role": "assistant"
       }
     }
   ],
   "created": 1683903999,
   "id": "chatcmpl-7FOdrkyG89943JYcw

In [25]:
# Print the captions after taking them out of the caption objects' list
for i, caption_object in enumerate(caption_objects):
  print(f'Image {i}')
  for choice in caption_object['choices']:
    print(choice['message']['content'])
  print()

Image 0
"Stepping up our skills, one dribble at a time 🙌⚽️ #SoccerFever #BallIsLife #GameOn #ArafedAthlete"
"The thrill of the game fuels every step. ⚽️🔥 #SoccerLife #GameOn #AthleticFeats #FieldOfDreams"
"Watch out world, Arafed is here to score and conquer the field! 🌟⚽️ #SoccerStar #FieldFrenzy #RisingAthlete"

Image 1
"Gather around, embrace the power of togetherness!" #PeopleUnite #CommunitySpirit #PosterPower

Image 2
"Forever friends, standing tall under the whimsical clouds ☁️🐎 #EquineBonding #NatureWonder"
"Taking a moment to appreciate the beauty of nature with my four-legged friends 🐴☁️ #horselove #farmfield #scenicview"

