In [1]:
!pip install --upgrade openai



In [2]:
!pip install python-dotenv



In [3]:
from google.colab import drive
from dotenv import load_dotenv
import os
from openai import OpenAI

In [4]:
# Change to your environment variable location in your Google Drive
load_dotenv('/content/drive/MyDrive/Unsupervised Machine Learning/BA 820 Team 6/.env')

api_key = os.getenv('API_KEY')

OpenAI.api_key = api_key

In [5]:
import requests
import openai
import pandas as pd
import base64
import re
from google.colab import files

In [6]:
# Set your API credentials
openai.api_key = os.getenv('API_KEY')  # OpenAI API key for ChatGPT

HF_API_TOKEN = "BA820team6"  # Token for Hugging Face (if using their API)
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}  # Header for Hugging Face requests

# Define model endpoint URLs (using Hugging Face model names as an example)
QWEN_API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B"        # Qwen multi-modal model
FLORENCE_API_URL = "https://api-inference.huggingface.co/models/microsoft/Florence-2-large"  # Florence-2-large model


In [7]:
# Upload chart image files (e.g., PNG or JPG charts)
uploaded = files.upload()  # This will prompt a file chooser in Colab

# List the uploaded file names for confirmation
for file_name in uploaded.keys():
    print("Uploaded file:", file_name)

Saving bar-liedown.png to bar-liedown (3).png
Uploaded file: bar-liedown (3).png


In [8]:
# Prepare a DataFrame or list to collect results
results = []  # will hold dicts for each insight and scores

batch_size = 5  # number of charts to process before pausing for review
chart_count = 0

# Loop over each uploaded chart image
for file_name, image_data in uploaded.items():
    chart_count += 1
    chart_id = file_name  # use filename as Chart_ID for identification

    print(f"\nProcessing {chart_id}...")

    # --- Generate insights using each AI model ---
    # 1. ChatGPT (GPT-4 Vision) insight
    # Encode image in base64 and include in prompt as a markdown image
    b64_image = base64.b64encode(image_data).decode('utf-8')
    chat_prompt = "Analyze the following chart and provide one key business insight:\n"
    chat_prompt += f"![chart](data:image/png;base64,{b64_image})"
    try:
        chat_response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # GPT-4 Vision model (ensure you have access)
            messages=[{"role": "user", "content": chat_prompt}]
        )
        insight_chatgpt = chat_response["choices"][0]["message"]["content"].strip()
    except Exception as e:
        insight_chatgpt = f"[Error fetching ChatGPT insight: {e}]"
    print(f"ChatGPT Insight: {insight_chatgpt}")

    # 2. Qwen insight via API
    qwen_prompt = "Provide a concise business insight based on the above chart."
    try:
        qwen_response = requests.post(
            QWEN_API_URL, headers=headers,
            files={"image": image_data}, data={"text": qwen_prompt}
        )
        # The response might be JSON with a field like 'generated_text', or plain text
        qwen_out = qwen_response.json() if qwen_response.headers.get("Content-Type","").startswith("application/json") else {}
        insight_qwen = (qwen_out.get("generated_text") or qwen_response.text).strip()
    except Exception as e:
        insight_qwen = f"[Error fetching Qwen insight: {e}]"
    print(f"Qwen Insight: {insight_qwen}")

    # 3. Florence-2-large insight via API
    florence_prompt = "What business insight can we infer from the above chart?"
    try:
        florence_response = requests.post(
            FLORENCE_API_URL, headers=headers,
            files={"image": image_data}, data={"text": florence_prompt}
        )
        florence_out = florence_response.json() if florence_response.headers.get("Content-Type","").startswith("application/json") else {}
        insight_florence = (florence_out.get("generated_text") or florence_response.text).strip()
    except Exception as e:
        insight_florence = f"[Error fetching Florence insight: {e}]"
    print(f"Florence-2 Insight: {insight_florence}")

    # --- Evaluate each insight using the other two models ---
    # We will ask each model to rate the insights from the others.
    # Define a helper function to parse "Relevance: X, Coherence: Y" from model outputs.
    def parse_scores(response_text):
        # Find two numbers (1-5) in the response_text
        match = re.search(r'(?i)Relevance[:\s]*([1-5])[^0-9]*Coherence[:\s]*([1-5])', response_text)
        if match:
            return int(match.group(1)), int(match.group(2))
        # If parsing fails, default to 0
        return 0, 0

    # Prepare evaluation prompts for each insight
    eval_prompt_template = (
        "The chart is provided above. Evaluate the following insight in terms of:\n"
        "- Relevance to the chart data (1-5)\n"
        "- Coherence of the explanation (1-5)\n"
        "Insight: \"{insight}\"\n"
        "Respond with format: Relevance: X, Coherence: Y"
    )


Processing bar-liedown (3).png...
ChatGPT Insight: [Error fetching ChatGPT insight: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
]
Qwen Insight: <!doctype html>
<html class="">
	<head>
		<meta charset="utf-8" />
		<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
		<meta name="description" content="We’re on a journey to advance and democratize artificial intelligence through open source and open science." />
		<meta property="fb:app_id" content="1321688464574422" />
		<meta name="twitter:card" content="summary_large_image" />
		<me

In [10]:
  # 4.1 ChatGPT evaluates Qwen's insight
    try:
        eval_prompt = eval_prompt_template.format(insight=insight_qwen)
        chat_eval = openai.ChatCompletion.create(
            model="gpt-4-vision",
            messages=[{"role": "user", "content": chat_prompt},  # include chart again
                      {"role": "assistant", "content": insight_chatgpt}],  # (optional) ChatGPT's own insight for context
                      functions=[],
            temperature=0,
            max_tokens=50,
            stop=None
        )  # Actually, including its own insight isn't necessary; we mainly need the chart and the insight to evaluate.
        eval_text = chat_eval["choices"][0]["message"]["content"]
    except Exception as e:
        eval_text = f"Relevance: 0, Coherence: 0 (Error: {e})"
    relevance_c_by_chat, coherence_c_by_chat = parse_scores(eval_text)

    # 4.2 ChatGPT evaluates Florence's insight
    try:
        eval_prompt = eval_prompt_template.format(insight=insight_florence)
        chat_eval = openai.ChatCompletion.create(
            model="gpt-4-vision",
            messages=[{"role": "user", "content": chat_prompt}],  # chart image prompt again
            temperature=0,
            max_tokens=50
        )
        # In practice, we might include the Florence insight in the user prompt or as assistant content for evaluation.
        # For simplicity, assuming ChatGPT remembers the chart from earlier in the conversation.
        eval_text = chat_eval["choices"][0]["message"]["content"]
    except Exception as e:
        eval_text = f"Relevance: 0, Coherence: 0 (Error: {e})"
    relevance_f_by_chat, coherence_f_by_chat = parse_scores(eval_text)

    # 4.3 Qwen evaluates ChatGPT's insight
    try:
        eval_prompt = eval_prompt_template.format(insight=insight_chatgpt)
        qwen_eval_resp = requests.post(
            QWEN_API_URL, headers=headers,
            files={"image": image_data}, data={"text": eval_prompt}
        )
        qwen_eval_out = qwen_eval_resp.json() if qwen_eval_resp.headers.get("Content-Type","").startswith("application/json") else {}
        eval_text = (qwen_eval_out.get("generated_text") or qwen_eval_resp.text)
    except Exception as e:
        eval_text = f"Relevance: 0, Coherence: 0 (Error: {e})"
    relevance_c_by_qwen, coherence_c_by_qwen = parse_scores(eval_text)

    # 4.4 Qwen evaluates Florence's insight
    try:
        eval_prompt = eval_prompt_template.format(insight=insight_florence)
        qwen_eval_resp = requests.post(
            QWEN_API_URL, headers=headers,
            files={"image": image_data}, data={"text": eval_prompt}
        )
        qwen_eval_out = qwen_eval_resp.json() if qwen_eval_resp.headers.get("Content-Type","").startswith("application/json") else {}
        eval_text = (qwen_eval_out.get("generated_text") or qwen_eval_resp.text)
    except Exception as e:
        eval_text = f"Relevance: 0, Coherence: 0 (Error: {e})"
    relevance_f_by_qwen, coherence_f_by_qwen = parse_scores(eval_text)

    # 4.5 Florence evaluates ChatGPT's insight
    try:
        eval_prompt = eval_prompt_template.format(insight=insight_chatgpt)
        florence_eval_resp = requests.post(
            FLORENCE_API_URL, headers=headers,
            files={"image": image_data}, data={"text": eval_prompt}
        )
        florence_eval_out = florence_eval_resp.json() if florence_eval_resp.headers.get("Content-Type","").startswith("application/json") else {}
        eval_text = (florence_eval_out.get("generated_text") or florence_eval_resp.text)
    } except Exception as e:
        eval_text = f"Relevance: 0, Coherence: 0 (Error: {e})"
    relevance_c_by_flo, coherence_c_by_flo = parse_scores(eval_text)

    # 4.6 Florence evaluates Qwen's insight
    try:
        eval_prompt = eval_prompt_template.format(insight=insight_qwen)
        florence_eval_resp = requests.post(
            FLORENCE_API_URL, headers=headers,
            files={"image": image_data}, data={"text": eval_prompt}
        )
        florence_eval_out = florence_eval_resp.json() if florence_eval_resp.headers.get("Content-Type","").startswith("application/json") else {}
        eval_text = (florence_eval_out.get("generated_text") or florence_eval_resp.text)
    except Exception as e:
        eval_text = f"Relevance: 0, Coherence: 0 (Error: {e})"
    relevance_f_by_flo, coherence_f_by_flo = parse_scores(eval_text)

    # --- Compute final scores for each insight ---
    # ChatGPT's insight was evaluated by Qwen and Florence
    relevance_chatgpt_score = (relevance_c_by_qwen + relevance_c_by_flo) / 2
    coherence_chatgpt_score = (coherence_c_by_qwen + coherence_c_by_flo) / 2
    avg_chatgpt_score = round((relevance_chatgpt_score + coherence_chatgpt_score) / 2, 2)

    # Qwen's insight was evaluated by ChatGPT and Florence
    relevance_qwen_score = (relevance_c_by_chat + relevance_f_by_flo) / 2
    coherence_qwen_score = (coherence_c_by_chat + coherence_f_by_flo) / 2
    avg_qwen_score = round((relevance_qwen_score + coherence_qwen_score) / 2, 2)

    # Florence's insight was evaluated by ChatGPT and Qwen
    relevance_florence_score = (relevance_f_by_chat + relevance_f_by_qwen) / 2
    coherence_florence_score = (coherence_f_by_chat + coherence_f_by_qwen) / 2
    avg_florence_score = round((relevance_florence_score + coherence_florence_score) / 2, 2)

    # Append results for each model as a separate row (dict) in results list
    results.append({
        "Chart_ID": chart_id,
        "Model_Name": "ChatGPT",
        "Generated_Insight": insight_chatgpt,
        "Relevance_Score": relevance_chatgpt_score,
        "Coherence_Score": coherence_chatgpt_score,
        "Average_Score": avg_chatgpt_score
    })
    results.append({
        "Chart_ID": chart_id,
        "Model_Name": "Qwen",
        "Generated_Insight": insight_qwen,
        "Relevance_Score": relevance_qwen_score,
        "Coherence_Score": coherence_qwen_score,
        "Average_Score": avg_qwen_score
    })
    results.append({
        "Chart_ID": chart_id,
        "Model_Name": "Florence-2-large",
        "Generated_Insight": insight_florence,
        "Relevance_Score": relevance_florence_score,
        "Coherence_Score": coherence_florence_score,
        "Average_Score": avg_florence_score
    })

    # Print evaluation scores for each insight
    print(f"ChatGPT Insight Scores – Relevance: {relevance_chatgpt_score:.1f}, Coherence: {coherence_chatgpt_score:.1f}, Avg: {avg_chatgpt_score:.2f}")
    print(f"Qwen Insight Scores    – Relevance: {relevance_qwen_score:.1f}, Coherence: {coherence_qwen_score:.1f}, Avg: {avg_qwen_score:.2f}")
    print(f"Florence Insight Scores – Relevance: {relevance_florence_score:.1f}, Coherence: {coherence_florence_score:.1f}, Avg: {avg_florence_score:.2f}")

    # Manual checkpoint: pause after each batch of charts
    if chart_count % batch_size == 0:
        input(f"\nProcessed {chart_count} charts. Review the insights and scores above, then press Enter to continue...")

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 79)