In [1]:
from google.colab import drive
import glob, os
from PIL import Image

# Mount Google Drive
drive.mount('/content/drive')

# Set the image folder path (adjust if needed)
image_folder = "/content/drive/MyDrive/BA820-Unsupervised ML/BA 820 Team 6/milestone 2/MarketingCharts"

# Retrieve list of image files in the folder (adjust file pattern as needed)
image_paths = glob.glob(os.path.join(image_folder, "*.*"))
print(f"Found {len(image_paths)} images in the folder.")
for path in image_paths:
    print(os.path.basename(path))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 1 images in the folder.
line-sale.png


In [2]:
from google.colab import userdata
import openai
openai.api_key = userdata.get('openai')  # OpenAI API key for ChatGPT

In [3]:
import torch

# Set device and torch_dtype based on GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device=="cuda" else torch.float32

In [4]:
chatgpt_model = "gpt-4o-mini"  # Adjust if needed

In [5]:
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
qwen_model_id = "Qwen/Qwen2-VL-7B-Instruct"
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
    qwen_model_id,
    torch_dtype=torch_dtype,
    device_map="auto"
)
qwen_processor = AutoProcessor.from_pretrained(qwen_model_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [6]:
from transformers import AutoProcessor, AutoModelForCausalLM
florence_model_id = "microsoft/Florence-2-large"
florence_processor = AutoProcessor.from_pretrained(florence_model_id, trust_remote_code=True)
florence_model = AutoModelForCausalLM.from_pretrained(
    florence_model_id,
    torch_dtype=torch_dtype,
    trust_remote_code=True
).to(device)



In [7]:
from openai import OpenAI
client = OpenAI(api_key=openai.api_key)  # Make sure your OpenAI API key is set

def get_chatgpt_insight():
    # Simplified prompt for ChatGPT
    persona_note = "Assume the user is a U.S.-based small business owner with at most some college education. Use plain, easy-to-understand English and avoid technical jargon."
    task_prompt = "Describe the business insights and actions based on the chart."
    chat_messages = [
        {"role": "system", "content": persona_note},
        {"role": "user", "content": task_prompt}
    ]
    try:
        chat_response = client.chat.completions.create(
            model=chatgpt_model,
            messages=chat_messages
        )
        chat_text = chat_response.choices[0].message.content.strip()
    except Exception as e:
        chat_text = f"Error: {e}"
    return chat_text

In [8]:
def extract_qwen_insights(image_path, prompt_text):
    try:
        image = Image.open(image_path)
        # Prepare conversation: image placeholder + prompt
        conversation = [{
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": prompt_text}
            ]
        }]
        # Set the chat template for Qwen (pass prompt_text directly)
        text_input = qwen_processor.apply_chat_template(
            conversation,
            chat_template=prompt_text,
            add_generation_prompt=True
        )
        # Prepare model inputs
        inputs = qwen_processor(text=[text_input], images=[image], return_tensors="pt", padding=True)
        inputs = inputs.to(qwen_model.device)
        # Generate output tokens
        output_ids = qwen_model.generate(**inputs, max_new_tokens=128)
        prompt_len = inputs.input_ids.shape[1]
        gen_tokens = output_ids[:, prompt_len:]
        generated_texts = qwen_processor.batch_decode(gen_tokens, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        insight_text = generated_texts[0].strip() if generated_texts else "No insights were generated for this image."
        return insight_text
    except Exception as e:
        print(f"Error processing {image_path} in Qwen: {e}")
        return "Error: unable to process image"

In [9]:
def run_florence_example(task_prompt, image, text_input=None):
    """
    Generate insights for the image using the Florence model.
    """
    if text_input is None:
        prompt = task_prompt
    else:
        prompt = task_prompt + text_input

    image = image.convert('RGB')
    inputs = florence_processor(text=prompt, images=image, return_tensors="pt")

    # Only cast pixel_values to float16; keep input_ids as integers.
    for key, tensor in inputs.items():
        if key == "pixel_values":
            inputs[key] = tensor.to(device, dtype=torch.float16)
        else:
            inputs[key] = tensor.to(device)

    generated_ids = florence_model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        early_stopping=False,
        do_sample=False,
        num_beams=3,
    )
    prompt_len = inputs.input_ids.shape[1]
    gen_tokens = generated_ids[:, prompt_len:]

    # For debugging: print raw output before post-processing
    raw_generated = florence_processor.batch_decode(gen_tokens, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
    print("Florence raw output:", raw_generated)

    parsed_answer = florence_processor.post_process_generation(
        raw_generated,
        task=task_prompt,
        image_size=(image.width, image.height)
    )
    return parsed_answer

In [10]:
results = []  # to accumulate results for each image

# Use a simplified prompt for Qwen and Florence
full_prompt_text = "Describe the business insights and actions based on the chart."

for img_path in image_paths:
    image_name = os.path.basename(img_path)
    print(f"\nProcessing image: {image_name}")

    # --- ChatGPT Analysis ---
    chat_text = get_chatgpt_insight()
    print("ChatGPT Output:\n", chat_text)

    # --- Qwen Analysis ---
    qwen_text = extract_qwen_insights(img_path, full_prompt_text)
    print("Qwen Output:\n", qwen_text)

    # --- Florence Analysis ---
    image = Image.open(img_path)
    florence_text = run_florence_example(full_prompt_text, image)
    print("Florence Output:\n", florence_text)

    # Save insights from all models
    results.append({
        "image": image_name,
        "ChatGPT_Insight": chat_text,
        "Qwen_Insight": qwen_text,
        "Florence_Insight": florence_text
    })

    _ = input("Press Enter to continue to evaluation for this image...")


Processing image: line-sale.png
ChatGPT Output:
 Sure! I'll help you with that. However, since I can't see the chart you're referring to, I can guide you on how to analyze it. Here’s what to think about when looking at a chart for business insights:

1. **Trends**: Look for patterns over time. Are there any noticeable increases or decreases? For example, if sales are going up, that’s a good sign. If they’re going down, you may need to investigate why.

2. **Comparisons**: See how different months, products, or categories compare to each other. Which ones are performing better? This can help you know where to focus your efforts.

3. **Outliers**: Check for any data points that are very different from the others. These could be errors or represent unique situations that are worth looking into.

4. **Goals**: Compare your performance to your business goals. Are you where you want to be? If not, you may need to adjust your strategies.

5. **Customer Behavior**: If the chart relates to cus