## Analyzing the breakdown of subjective vs objective song analysis

In [None]:

from openai import OpenAI

client = OpenAI(api_key='ENTER API KEY')

# Set up your API key

# Chat completion function
def chat_with_gpt(reference_caption):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content":
                    "You are an expert in music captioning. Your task is to analyze a given caption and categorize its words into two distinct lists:\n\n"
                    "1. **Objective Words**: Words that describe measurable, factual elements such as:\n"
                    "   - Instruments used (e.g., guitar, piano, drums)\n"
                    "   - Genre (e.g., jazz, rock, classical)\n"
                    "   - Tempo (e.g., fast, slow, upbeat)\n"
                    "   - Gender of the artist (e.g., male, female, non-binary)\n"
                    "   - Audio quality (e.g., clear, distorted, lo-fi)\n\n"
                    "2. **Subjective Words**: Words that describe opinions, emotions, or mood, such as:\n"
                    "   - Emotional tone (e.g., sad, joyful, melancholic)\n"
                    "   - Descriptive adjectives (e.g., haunting, energetic, dreamy)\n\n"
                    "Please strictly follow this output format:\n"
                    "**Output Format:** `[[list of objective words], [list of subjective words]]`\n"
                    "- The response must be a valid Python list containing two sublists.\n"
                    "- Do not include explanations or extra text outside the list."
                },
                {"role": "user", "content": f"Caption: {reference_caption}\n\nPlease extract and return the lists accordingly."}
            ]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return None




In [None]:
import pandas as pd


In [None]:
the_captioning_table = pd.read_csv("/content/musiccaps-public.csv")

In [None]:
the_captioning_table

Unnamed: 0,ytid,start_s,end_s,audioset_positive_labels,aspect_list,caption,author_id,is_balanced_subset,is_audioset_eval
0,-0Gj8-vB1q4,30,40,"/m/0140xf,/m/02cjck,/m/04rlf","['low quality', 'sustained strings melody', 's...",The low quality recording features a ballad so...,4,False,True
1,-0SdAVK79lg,30,40,"/m/0155w,/m/01lyv,/m/0342h,/m/042v_gx,/m/04rlf...","['guitar song', 'piano backing', 'simple percu...",This song features an electric guitar as the m...,0,False,False
2,-0vPFx-wRRI,30,40,"/m/025_jnm,/m/04rlf","['amateur recording', 'finger snipping', 'male...",a male voice is singing a melody with changing...,6,False,True
3,-0xzrMun0Rs,30,40,"/m/01g90h,/m/04rlf","['backing track', 'jazzy', 'digital drums', 'p...",This song contains digital drums playing a sim...,6,False,True
4,-1LrH01Ei1w,30,40,"/m/02p0sh1,/m/04rlf","['rubab instrument', 'repetitive melody on dif...",This song features a rubber instrument being p...,0,False,False
...,...,...,...,...,...,...,...,...,...
5516,zw5dkiklbhE,15,25,"/m/01sm1g,/m/0l14md","['amateur recording', 'percussion', 'wooden bo...",This audio contains someone playing a wooden b...,6,False,False
5517,zwfo7wnXdjs,30,40,"/m/02p0sh1,/m/04rlf,/m/06j64v","['instrumental music', 'arabic music', 'genera...",The song is an instrumental. The song is mediu...,1,True,True
5518,zx_vcwOsDO4,50,60,"/m/01glhc,/m/02sgy,/m/0342h,/m/03lty,/m/04rlf,...","['instrumental', 'no voice', 'electric guitar'...",The rock music is purely instrumental and feat...,2,True,True
5519,zyXa2tdBTGc,30,40,"/m/04rlf,/t/dd00034","['instrumental music', 'gospel music', 'strong...",The song is an instrumental. The song is slow ...,1,False,False


In [None]:
def convert_string_to_lists(input_string):
    """Converts a string representation of nested lists to two separate lists.

    Args:
        input_string: The string to convert.  It should represent a list containing two lists.

    Returns:
        A tuple containing two lists, or None if the input is invalid.
    """
    try:
        # Remove brackets and split by comma
        cleaned_string = input_string.replace('[', '').replace(']', '').strip()
        list_strings = cleaned_string.split(',')

        # Split the list strings further into two main lists
        objective_words = [item.strip().replace("'", "") for item in list_strings[0].split()]
        subjective_words = [item.strip().replace("'", "") for item in list_strings[1].split()]
        return objective_words, subjective_words

    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [None]:
def formatting_responses(df):
  new_dataframe = pd.DataFrame(columns=["id","caption", "objective_words", "subjective_words"])
  count = 0
  for index in range(len(df)):
    list_of_words = chat_with_gpt(df.iloc[index]["caption"])
    convert_string_to_lists(list_of_words)
    id = df.iloc[index]["ytid"]
    caption = df.iloc[index]["caption"]
    objective_words = convert_string_to_lists(list_of_words)[0]
    subjective_words= convert_string_to_lists(list_of_words)[1]
    new_dataframe.loc[index] = [id, caption, objective_words, subjective_words]
    count+=1
    print(count)
    if count%100 == 0:
      new_dataframe.to_csv("new_dataframe.csv")
      print(new_dataframe)
  new_dataframe.to_csv("new_dataframe.csv")
  return new_dataframe



In [None]:
import pandas as pd
import concurrent.futures
from tqdm import tqdm  # Progress bar library

def process_caption(row):
    """Function to process a single row from the DataFrame"""
    try:
        list_of_words = chat_with_gpt(row["generated_caption"])
        objective_words, subjective_words = convert_string_to_lists(list_of_words)
        return row["file_id"], row["generated_caption"], objective_words, subjective_words
    except Exception as e:
        print(f"Error processing row {row['file_id']}: {e}")
        return row["file_id"], row["generated_caption"], None, None  # Handle failed API calls gracefully

def formatting_responses(df, num_workers=10):
    """Parallelized function to process DataFrame using ThreadPoolExecutor with a progress bar"""
    results = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        # Use tqdm for progress visualization
        for result in tqdm(executor.map(process_caption, df.to_dict(orient="records")), total=len(df), desc="Processing Captions"):
            results.append(result)

    # Convert results into a DataFrame
    new_dataframe = pd.DataFrame(results, columns=["id", "caption", "objective_words", "subjective_words"])

    # Save results to CSV
    new_dataframe.to_csv("new_dataframe.csv", index=False)

    return new_dataframe


In [None]:
our_table = formatting_responses(the_captioning_table)

Processing Captions: 100%|██████████| 5521/5521 [14:51<00:00,  6.19it/s]


In [None]:
our_table.to_csv("total_table_analysis.csv")

In [None]:
# prompt: convert [['advertisement', 'jingle', 'didgeridoo', 'percussively', 'drone', 'amateur recording', 'poor audio-quality'], ['scary', 'horror-like']] to two lists, the initial input is a string

def convert_string_to_lists(input_string):
    """Converts a string representation of nested lists to two separate lists.

    Args:
        input_string: The string to convert.  It should represent a list containing two lists.

    Returns:
        A tuple containing two lists, or None if the input is invalid.
    """
    try:
        # Remove brackets and split by comma
        cleaned_string = input_string.replace('[', '').replace(']', '').strip()
        list_strings = cleaned_string.split(',')

        # Split the list strings further into two main lists
        objective_words = [item.strip().replace("'", "") for item in list_strings[0].split()]
        subjective_words = [item.strip().replace("'", "") for item in list_strings[1].split()]
        return objective_words, subjective_words

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Example usage
string_input = "['advertisement', 'jingle', 'didgeridoo', 'percussively', 'drone', 'amateur recording', 'poor audio-quality'], ['scary', 'horror-like']"
objective_list, subjective_list = convert_string_to_lists(string_input)

if objective_list and subjective_list:
  print("Objective Words:", objective_list)
  print("Subjective Words:", subjective_list)


Objective Words: ['advertisement']
Subjective Words: ['jingle']


In [None]:
inference_table = pd.read_csv("/content/csv_validation_table.csv")

In [None]:
inference_table = formatting_responses(inference_table)

Processing Captions:  92%|█████████▏| 951/1032 [02:55<00:11,  7.21it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row Xus0LI3QV2A: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row mqyeBqaUeN8: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  94%|█████████▍| 974/1032 [03:00<00:08,  6.86it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row WK-gdfCurCg: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 8oTTgXIO0-I: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  95%|█████████▍| 977/1032 [03:00<00:07,  6.95it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row AVogdV8khxc: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row IqGB4nQIAcQ: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  95%|█████████▍| 980/1032 [03:01<00:07,  7.04it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row ZleHXDirD58: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 5bn7PPKcqSA: cannot unpack non-iterable NoneType object


Processing Captions:  95%|█████████▌| 982/1032 [03:01<00:07,  7.07it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row QZNrK337wow: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 6XZGmRuaOfo: cannot unpack non-iterable NoneType object


Processing Captions:  95%|█████████▌| 984/1032 [03:02<00:09,  5.15it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row hoPnrbKOEl8: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row tJWduBZRJkE: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  96%|█████████▌| 987/1032 [03:02<00:07,  5.98it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row IMnh-TIyFuE: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row JPVRBbdykSw: cannot unpack non-iterable NoneType object


Processing Captions:  96%|█████████▌| 990/1032 [03:03<00:06,  6.01it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row wraN7rWUsfI: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row AzsBSUmhl1M: cannot unpack non-iterable NoneType object


Processing Captions:  96%|█████████▌| 991/1032 [03:03<00:07,  5.37it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row woVby8SBWDI: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row MipnqUXgpOA: cannot unpack non-iterable NoneType object


Processing Captions:  96%|█████████▌| 993/1032 [03:03<00:07,  5.01it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row uAgizG1hYw0: cannot unpack non-iterable NoneType object


Processing Captions:  96%|█████████▋| 994/1032 [03:04<00:09,  3.93it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 9ZryMX2UtAo: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row tw8-TlQBcBA: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  97%|█████████▋| 997/1032 [03:04<00:06,  5.45it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row mWuX--EEq2E: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row ua0hgl8fi0I: cannot unpack non-iterable NoneType object


Processing Captions:  97%|█████████▋| 999/1032 [03:05<00:05,  5.67it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 4GlH0-KhInI: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row ABWE9tjTvuI: cannot unpack non-iterable NoneType object


Processing Captions:  97%|█████████▋| 1001/1032 [03:05<00:04,  6.47it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row O6xMQnKJROc: cannot unpack non-iterable NoneType object


Processing Captions:  97%|█████████▋| 1002/1032 [03:05<00:05,  5.37it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row ViF7A7XODiw: cannot unpack non-iterable NoneType object


Processing Captions:  97%|█████████▋| 1003/1032 [03:05<00:05,  5.13it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row ymOjaaxRDLU: cannot unpack non-iterable NoneType object


Processing Captions:  97%|█████████▋| 1005/1032 [03:06<00:06,  4.17it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row IFumVgqOVaM: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 5gh5H0QqJl0: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  98%|█████████▊| 1007/1032 [03:06<00:04,  5.18it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row kR2yBlL6nFU: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row dS8AZdmn8Wk: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  98%|█████████▊| 1009/1032 [03:08<00:08,  2.60it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 51bsCRv6kI0: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row DroAzooK4yw: cannot unpack non-iterable NoneType object


Processing Captions:  98%|█████████▊| 1015/1032 [03:08<00:03,  5.26it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row l8P2wU-JyI8: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row VzFpg271sm8: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions:  99%|█████████▊| 1019/1032 [03:08<00:01,  6.55it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row I-Z3gB6pfIA: cannot unpack non-iterable NoneType object


Processing Captions:  99%|█████████▉| 1020/1032 [03:09<00:01,  6.02it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row cnmedj0fYTQ: cannot unpack non-iterable NoneType object


Processing Captions:  99%|█████████▉| 1021/1032 [03:09<00:02,  4.76it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row doHRurF8bf8: cannot unpack non-iterable NoneType object


Processing Captions:  99%|█████████▉| 1023/1032 [03:10<00:01,  4.96it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row Pm6vRblouxc: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row dl1ljByerd8: cannot unpack non-iterable NoneType object


Processing Captions:  99%|█████████▉| 1025/1032 [03:10<00:01,  5.30it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row CphwhKgYHaM: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row OjEG808MfF4: cannot unpack non-iterable NoneType object


Processing Captions:  99%|█████████▉| 1026/1032 [03:10<00:01,  5.35it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row 0F-Z0zF1504: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row wj4ukZFNEgs: cannot unpack non-iterable NoneType object
An error occurred: Error code: 429 - {'error': {'message':

Processing Captions: 100%|█████████▉| 1029/1032 [03:10<00:00,  6.16it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row C8VECv8kicU: cannot unpack non-iterable NoneType object


Processing Captions: 100%|█████████▉| 1030/1032 [03:11<00:00,  5.78it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row D-p9s8y2z_U: cannot unpack non-iterable NoneType object


Processing Captions: 100%|█████████▉| 1031/1032 [03:11<00:00,  4.40it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row qtnE1hnCD0M: cannot unpack non-iterable NoneType object


Processing Captions: 100%|██████████| 1032/1032 [03:11<00:00,  5.38it/s]

An error occurred: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
An error occurred: 'NoneType' object has no attribute 'replace'
Error processing row gD6UoStqCsg: cannot unpack non-iterable NoneType object





In [None]:
inference_table = inference_table.dropna()

In [None]:
inference_table.to_csv("inference_analysis.csv")

In [None]:
def subjective_objective(df):
  subjective_lengths = []
  objective_lengths = []
  for index in range(len(df)):
    caption_length = len(df.iloc[index]["caption"].split())
    objective_percentage = len(df.iloc[index]["objective_words"])/caption_length
    subjective_percentage = len(df.iloc[index]["subjective_words"])/caption_length
    subjective_lengths.append(subjective_percentage)
    objective_lengths.append(objective_percentage)

  return objective_lengths, subjective_lengths



In [None]:
baseline = subjective_objective(our_table)

In [None]:
import numpy as np

In [None]:
print({
    "Percent of words describing objective features for ground truth": np.mean(baseline[0]),
    "Percent of words describing subjective features for ground truth": np.mean(baseline[1])


})

{'Percent of words describing objective features for ground truth': 0.030404314187460063, 'Percent of words describing subjective features for ground truth': 0.02638016679681101}


In [None]:
validation = subjective_objective(inference_table)

In [None]:
print({
    "Percent of words describing objective features for generated": np.mean(validation[0]),
    "Percent of words describing subjective features for generated": np.mean(validation[1])


})

{'Percent of words describing objective features for generated': 0.03795286729117617, 'Percent of words describing subjective features for generated': 0.028890143447149994}


In [None]:
inference_table

Unnamed: 0,id,caption,objective_words,subjective_words
0,EUmfsCvmkgo,This is a kids music piece in the style of pop...,[kids],"[pop, punk]"
1,Wu3LKQG1fwU,The low quality recording features a live perf...,"[low, quality]",[live]
2,8jDanS4ZzRc,This is a classical music piece. It is an inst...,[classical],[instrumental]
3,lTAfSpsyTSI,This symphonic song starts off with the melody...,[symphonic],[instrument]
4,LbPRGDwlfqs,The low quality recording features a soft rock...,"[low, quality]","[soft, rock]"
...,...,...,...,...
960,Z-G7nL9tiws,The low quality recording features an uptempo ...,"[low, quality]",[uptempo]
961,1JwoLPCIGhs,The low quality recording features a cover of ...,"[low, quality]",[recording]
962,7Mv4eKPe850,The low quality recording features a live perf...,"[low, quality]",[live]
963,aBXntqgPo6Q,This is an instrument showcase jam. There is a...,[instrument],[male]


## Calculating whether or not the difference is significant

In [None]:
pooled_proportion = ((np.mean(baseline[1]) * len(baseline[1])) + (np.mean(validation[1]) * len(validation[1]))) / (len(baseline[1]) + len(validation[1]))


In [None]:
pooled_proportion

0.02675360612267859

In [None]:
z_score = (np.mean(baseline[1]) - np.mean(validation[1])) / np.sqrt(pooled_proportion * (1 - pooled_proportion) * ((1/len(baseline[1])) + (1/len(validation[1]))))


In [None]:
z_score

-0.4458114036838248

## Not statistically significant, now trying objective

In [None]:
pooled_proportion = ((np.mean(baseline[0]) * len(baseline[0])) + (np.mean(validation[0]) * len(validation[0]))) / (len(baseline[0]) + len(validation[0]))


In [None]:
z_score = (np.mean(baseline[0]) - np.mean(validation[0])) / np.sqrt(pooled_proportion * (1 - pooled_proportion) * ((1/len(baseline[0])) + (1/len(validation[0]))))


In [None]:
z_score

-1.2381122070567319

In [None]:
combined_table = our_table.merge(inference_table, on="id").rename({"caption_x":"caption_truth","objective_words_x":"objective_words_truth","subjective_words_x":"subjective_words_truth","caption_y":"caption_generated","objective_words_y":"objective_words_generated","subjective_words_y":"subjective_words_generated"},axis=1)

In [None]:
combined_table

Unnamed: 0,id,caption_truth,objective_words_truth,subjective_words_truth,caption_generated,objective_words_generated,subjective_words_generated
0,-m9pH0WXQto,The Electro Pop song features a flat female vo...,"[Electro, Pop]",[female],The low quality recording features a pop song ...,[low],[female]
1,05JAmKFVy44,The low quality recording features a kids song...,"[low, quality]",[kids],"The low quality recording features a filtered,...","[low, quality]",[recording]
2,05OJDYeHLMc,This audio contains very strong deep percussiv...,[percussive],[e-guitar],This song is a spirited duet in foreign langua...,[duet],"[foreign, language]"
3,0Gxn9FtaJFc,This is a rock music piece with a male vocal. ...,[rock],[male],This is an indie rock music piece. There is a ...,"[indie, rock]",[male]
4,0JbGxIR8JTk,Afrobeat inspired music with an island flavor....,[Afrobeat],[island],A male vocalist sings this energetic song. The...,[male],[fast]
...,...,...,...,...,...,...,...
960,ynWPvcGXFrM,Violins are playing a lead melody underlined b...,[violins],[lead],"This is an upbeat, soulful and funky song. It'...",[upbeat],[fast]
961,z9hRQiJMnIw,This heavy metal song features a high pitched ...,"[heavy, metal]",[male],This is the live recording of an alternative m...,[live],[alternative]
962,zPhuyMYy9EI,A high pitched long note is playing along with...,[synthesizer],[pad],The low quality recording features an ambient ...,"[low, quality]",[ambient]
963,zaEdWwSamS0,Different melodies are being played by bell so...,[bell],[e-piano],The low quality recording features a cover of ...,[quality],[cover]


In [None]:
# prompt: for the given table, check the number of overlapping words between objective_words_truth and objective_words_generated. do the same for subjective

def overlapping_words(df):
    objective_overlap = []
    subjective_overlap = []

    for index in range(len(df)):
        objective_truth = set(df.iloc[index]["objective_words_truth"])
        objective_generated = set(df.iloc[index]["objective_words_generated"])
        objective_overlap.append(len(objective_truth.intersection(objective_generated)))

        subjective_truth = set(df.iloc[index]["subjective_words_truth"])
        subjective_generated = set(df.iloc[index]["subjective_words_generated"])
        subjective_overlap.append(len(subjective_truth.intersection(subjective_generated)))

    return objective_overlap, subjective_overlap

objective_overlap, subjective_overlap = overlapping_words(combined_table)

print("Average overlapping objective words:", np.mean(objective_overlap))
print("Average overlapping subjective words:", np.mean(subjective_overlap))


Average overlapping objective words: 0.20518134715025907
Average overlapping subjective words: 0.07046632124352331
