In [None]:
import json
import requests
import yaml
import time
import base64
import mimetypes
import glob
import pandas as pd
import numpy as np
from openai import AzureOpenAI

In [None]:
# Load YAML file
def load_config(filename):
    with open(filename, 'r') as file:
        config = yaml.safe_load(file) 
    return config

config = load_config('config.yml')

In [None]:
API_KEY = config['API_KEY']
API_VERSION = config['API_VERSION']
RESOURCE_ENDPOINT = config['RESOURCE_ENDPOINT']
deployment = 'gpt-4o-2024-05-13'
DEPLOYMENT_MODEL = config['DEPLOYMENT_MODEL']

In [None]:
# this method uses versa to extract an image from local storage

def extractFromLocalImage(prompt, image_path):

    with open(image_path, 'rb') as file:
        image_bytes = file.read()

    image_base64 = base64.b64encode(image_bytes).decode('utf-8')

    client = AzureOpenAI(
        api_key=API_KEY,
        api_version=API_VERSION,
        azure_endpoint=RESOURCE_ENDPOINT,
    )

    prompt += " The filename is the end of the image path: " + image_path

    mime_type, _ = mimetypes.guess_type(image_path)

    # Fallback in case MIME type couldn't be determined
    if mime_type is None:
        mime_type = "application/octet-stream" 

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {
                    "type": "image_url",
                    "image_url": {
                        #"url": f"data:image/jpeg;base64,{image_base64}"
                        "url": f"data:{mime_type};base64,{image_base64}"
                    },
                },
            ],
        }
    ]

    response = client.chat.completions.create(
        model=deployment,
        messages=messages,
        max_tokens=1000,
    )

    return response



In [None]:
# from local storage

prompt = """
Describe smoking related elements of this image, if present. Format response sa tab delimited file, with fields: filename, smoking_present, short_description, extended_description. For fields with multiple entries, use commas to delimit. Use unkown if field value is
undetermined. Do not include the field names, only the line of data.
"""

response = extractFromLocalImage(prompt, "./extracted_frames/Camel%20filters__frame_0050__0-00-48.jpg")

In [None]:
response

In [None]:
print(response.choices[0].message.content)

In [None]:
# this next section reads the first 10 files from local storage

# first, extract the local paths into a list 
#./Images/images/*.jpg

jpg_files = glob.glob("./extracted_frames/*.jpg")

for i, img in enumerate(jpg_files):
    print(img)

In [None]:
# next, call versa to extract information from the text based on the prompt

res = []
for i, img in enumerate(jpg_files):
    if i >= 3:
        break
    print(img)
    response = extractFromLocalImage(prompt, img)
    line = response.choices[0].message.content.split('\t')
    res.append(line)

In [None]:
# save the results into a pandas dataframe
data = res

In [None]:
# Define the headers
headers = ['filename', 'smoking_present', 'short_description', 'extended_description']

# Prepare the cleaned data
cleaned_data = []
errors = []


#note - I print out the number of columns to see if we are producing a consistent dataframe, since Versa can produce one-off errors
# with the prompt I used

for row in data:
    print(len(row))
    if len(row) == len(headers):
        cleaned_data.append(row)  # valid row
    elif len(row) < len(headers):
        errors.append(row)
        # Pad missing columns with NaN
        row += [np.nan] * (len(headers) - len(row))
        cleaned_data.append(row)
    elif len(row) > len(headers):
        errors.append(row)
        # Condense excess data points into a single string
        condensed = ', '.join(row[len(headers):])  # Join extra values into a string
        cleaned_data.append(row[:len(headers)] + [condensed])  # Add the condensed string

# Create a DataFrame with the correct headers
df = pd.DataFrame(cleaned_data, columns=headers)

In [None]:
df

In [None]:
df.to_csv("smoking_detection_visual.csv", index=False)