In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [1]:
import pandas as pd
import os


In [5]:
# check if the OpenAI API key is set
bool(os.getenv("OPENAI_API_KEY"))

True

In [6]:

spectrogram_jpg_dirpath = "/scratch/cse692w25_class_root/cse692w25_class/jhsansom/spectrograms"

csv_path = "data/AnnotatedSpectrograms.csv"
spectrogram_metadata = pd.read_csv(csv_path)
# spectrogram_metadata

In [7]:
spectrogram_metadata["path"] = [spectrogram_jpg_dirpath + "/" + filename for filename in spectrogram_metadata["specs"]]
sum([os.path.exists(p) for p in spectrogram_metadata["path"]]) - len(spectrogram_metadata["path"])  # check if all paths exist

0

In [8]:
# n_syllables is not nan
spectrogram_metadata_nonan = spectrogram_metadata[spectrogram_metadata["n_syllables"].notna()]
spectrogram_metadata_nonan

Unnamed: 0,wavs_paths,peak_frequency,specs,n_syllables,path
0,210924-071028_uiM_tt_28s.wav,1500.0,210924-071028_uiM_tt_28s.jpg,1.0,/scratch/cse692w25_class_root/cse692w25_class/...
1,210624-094108_1s_uM_tt.wav,10265.625,210624-094108_1s_uM_tt.jpg,3.0,/scratch/cse692w25_class_root/cse692w25_class/...
2,210707-112111_6s_mie_tt.wav,6468.75,210707-112111_6s_mie_tt.jpg,6.0,/scratch/cse692w25_class_root/cse692w25_class/...
3,210625-114034_3s_man_tt.wav,8625.0,210625-114034_3s_man_tt.jpg,5.0,/scratch/cse692w25_class_root/cse692w25_class/...
4,210914-080445_uafP_tt_1s.wav,9703.125,210914-080445_uafP_tt_1s.jpg,7.0,/scratch/cse692w25_class_root/cse692w25_class/...
5,211209-112549_uM_tt_78s.wav,1312.5,211209-112549_uM_tt_78s.jpg,1.0,/scratch/cse692w25_class_root/cse692w25_class/...
6,220223-075957_25s_uT_tt.wav,6046.875,220223-075957_25s_uT_tt.jpg,1.0,/scratch/cse692w25_class_root/cse692w25_class/...
7,211103-082228_TAN_tt_1s.wav,4265.625,211103-082228_TAN_tt_1s.jpg,6.0,/scratch/cse692w25_class_root/cse692w25_class/...
8,220120-134001_61s_TRU_tt.wav,10875.0,220120-134001_61s_TRU_tt.jpg,5.0,/scratch/cse692w25_class_root/cse692w25_class/...
9,210924-071430_MEZ_tt_29s.wav,3468.75,210924-071430_MEZ_tt_29s.jpg,7.0,/scratch/cse692w25_class_root/cse692w25_class/...


In [9]:
import torch
print("PyTorch CUDA version:", torch.version.cuda)


PyTorch CUDA version: 11.8


In [11]:
import base64
from PIL import Image
import io

def encode_image_to_base64(image_path):
    with Image.open(image_path) as img:
        buffered = io.BytesIO()
        img.save(buffered, format="PNG")
        img_bytes = buffered.getvalue()
        img_base64 = base64.b64encode(img_bytes).decode("utf-8")
    return img_base64


In [12]:
import requests
import os

api_key = os.getenv("OPENAI_API_KEY")  # or hardcode if local testing
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

def ask_gpt4_with_image(image_b64, question):
    data = {
        "model": "gpt-4o",
        "messages": [
            {"role": "user", "content": [
                {"type": "text", "text": question},
                {"type": "image_url", "image_url": {
                    "url": f"data:image/png;base64,{image_b64}"
                }}
            ]}
        ],
        "max_tokens": 100
    }
    response = requests.post(
        "https://api.openai.com/v1/chat/completions",
        headers=headers, json=data
    )
    return response.json()["choices"][0]["message"]["content"]


In [21]:
results_q1 = []
results_q2 = []

for i in range(len(spectrogram_metadata_nonan)):
# for i in range(2):
    path = spectrogram_metadata["path"][i]
    if not os.path.exists(path):
        print(f"File {path} does not exist.")
        spectrogram_metadata["path"][i] = None
        continue

    try:
        image_b64 = encode_image_to_base64(path)

        query1 = "What is the peak frequency indicated by the red dashed line in this spectrogram? respond with a number only."
        query2 = "How many syllables are there visible in this spectrogram? Respond with a number only."

        result1 = ask_gpt4_with_image(image_b64, query1)
        result2 = ask_gpt4_with_image(image_b64, query2)

        results_q1.append(result1)
        results_q2.append(result2)

    except Exception as e:
        print(f"Error processing file {path}: {e}")
        results_q1.append(None)
        results_q2.append(None)


In [22]:
# save results to csv
results_df = pd.DataFrame({
    "filename": spectrogram_metadata_nonan["specs"],
    "peak_frequency": results_q1,
    "n_syllables": results_q2
})

results_df.to_csv('/scratch/cse692w25_class_root/cse692w25_class/jhsansom/results/specs/AnnotatedSpectrograms_GPT4.csv', index=False)

In [24]:
results_df

Unnamed: 0,filename,peak_frequency,n_syllables
0,210924-071028_uiM_tt_28s.jpg,1000,1
1,210624-094108_1s_uM_tt.jpg,10000,3
2,210707-112111_6s_mie_tt.jpg,7500,3
3,210625-114034_3s_man_tt.jpg,10000,2
4,210914-080445_uafP_tt_1s.jpg,10000,7
5,211209-112549_uM_tt_78s.jpg,1804,0
6,220223-075957_25s_uT_tt.jpg,5000,2
7,211103-082228_TAN_tt_1s.jpg,5000,4
8,220120-134001_61s_TRU_tt.jpg,11000,5
9,210924-071430_MEZ_tt_29s.jpg,2500,1
