In [59]:
import os
import logging

from langchain_openai import OpenAI
import datasets
from datasets import DatasetDict, load_from_disk
import pandas as pd
import tenacity

logging.basicConfig(level=logging.INFO, format=' %(asctime)s - %(levelname)s - %(message)s')

model = ChatOpenAI(model="gpt-4o", temperature=0.0)

os.environ["OPENAI_API_KEY"] = ""

In [28]:
# Load dataset
music = datasets.load_from_disk('hfdata_music.json')
music

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 1785
    })
    test: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 506
    })
    valid: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 260
    })
})

In [5]:
labels = ["O",                      
          "B-Artist",       
          "I-Artist",          
          "B-Work of Art",
          "I-Work of Art"]

print(labels)

['O', 'B-Artist', 'I-Artist', 'B-Work of Art', 'I-Work of Art']


In [6]:
# Define functions to convert between numeric and text labels
id2label = {
    0: "O",
    1: "B-Artist",
    2: "I-Artist",
    3: "B-Work of Art",
    4: "I-Work of Art",
}

def convert_id2label(input_list):
    return [id2label.get(item, item) for item in input_list]

label2id = {v: k for k, v in id2label.items()}

def convert_label2id(input_list):
    return [label2id.get(item, item) for item in input_list]

In [54]:
# Set number of examples
k = 32

# Retrieve examples with text labels
examples = []

for i in range(0,k):
    select_example = {}
    select_example["input"] = music["train"][i]['tokens']
    text_labels = music["train"][i]['ner_tags']
    select_example["output"] = convert_id2label(text_labels)
    examples.append(select_example)

for i in examples:
    print(i)

{'input': ['suggestions', 'for', 'metal', 'mixed', 'with', 'classical', 'ochestra'], 'output': ['O', 'O', 'O', 'O', 'O', 'O', 'O']}
{'input': ['who', 'are', 'some', 'good', 'artists', 'that', 'have', 'good', 'meaningful', 'lyrics', 'that', 'play', 'acoustic', 'guitar'], 'output': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']}
{'input': ['help', 'me', 'find', 'songs', 'genre', 'similar', 'to', 'this'], 'output': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']}
{'input': ['looking', 'for', 'some', 'chill', 'rock', 'jazz', 'blues', 'to', 'listen', 'to', 'as', 'background', 'music', 'to', 'work', 'game', 'etc', 'to'], 'output': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']}
{'input': ['hardest', 'geometry', 'problem', 'in', 'the', 'world', 'mark', 'mothersbaugh', 'similar', 'songs'], 'output': ['B-Work of Art', 'I-Work of Art', 'I-Work of Art', 'I-Work of Art', 'I-Work of Art', 'I-Work of Art', 'B-Artist', 'I-Artist', 'O', 'O']

In [55]:
from langchain_core.prompts import (ChatPromptTemplate,FewShotChatMessagePromptTemplate)

# This is a prompt template used to format each individual example.
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

In [56]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ('system', """
            You are an expert in Natural Language Processing. 
            Your task is to identify common Named Entities (NER) for each token in a given list.
            Each token can be one of the following labels: ['O', 'B-Artist', 'I-Artist', 'B-Work of Art', 'I-Work of Art'].
            Return your answer in the format of a list of labels, where each label corresponds to a token in the input list.
            The number of labels in output sequence should be equal to the number of tokens in the input list.
            Print output list only"""),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)

In [57]:
import ast

# Retrieve list of labels from LLM output
def retrieve_labels(output_result):
    res_content = output_result.content
    output_list = ast.literal_eval(res_content)
    return output_list


In [58]:
import time

# Chain prompt and invoke model
chain = final_prompt | model

test_results = []

# Add retry decorator to handle rate limit
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5))
for i in range(0,100):
    print("Predicting", i)
    test_sequence = music['test']['tokens'][i]
    true_labels = music['test']['ner_tags'][i]
    res = chain.invoke(test_sequence)
    pred_labels = retrieve_labels(res)
    test_results.append((i, pred_labels))

Predicting 0


 2024-09-20 23:11:48,867 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
 2024-09-20 23:11:48,876 - INFO - Retrying request to /chat/completions in 0.789509 seconds
 2024-09-20 23:11:49,855 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
 2024-09-20 23:11:49,858 - INFO - Retrying request to /chat/completions in 1.699122 seconds
 2024-09-20 23:11:51,749 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [47]:
# Show difference in length between input and output sequences
def view_len_results(results_list):
    diff_in_len = []
    
    for i, pred_labels in results_list:
        len_input_sequence = len(music['test']['tokens'][i])
        diff_in_len.append(len(pred_labels) - len_input_sequence)

    return diff_in_len

print("Difference in length of output and input:", view_len_results(test_results))

Difference in length of output and input: [0, 0, 1, 0, 0, -1, -1, 0, 0, 1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -3, 0, -1, 0, 1, -2, 0, -2, 2, 1, -1, -1, 0, -1, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 1, 0, 0, -1, -2, 0, -3, 0, 0, 1, 0, 0, -1, 0, 0, 0, 1, 0, 0, -1, -2, 0, 0, 0, -1, 0, 0, 0, -3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -1, 0, -2, 0, 0]


In [48]:
# Format list for evaluation
# Match the length of the predicted list with the true list
def match_list(pred_list, true_list):
    while len(pred_list) < len(true_list):
        pred_list.append("O")
    while len(pred_list) > len(true_list):
        pred_list.pop()

    return pred_list

y_true=[]
y_pred=[]

for i in test_results:
    id = i[0]
    pred_labels = i[1]
    true_labels = convert_id2label(music['test']['ner_tags'][id])
    padded_pred_labels = match_list(pred_labels, true_labels)
    y_true.extend(true_labels)
    y_pred.extend(padded_pred_labels)


In [49]:
from sklearn.metrics import classification_report
print("Classification Report\n")
print(classification_report(y_true, y_pred, labels=['O', 'B-Artist', 'I-Artist', 'B-Work of Art', 'I-Work of Art']))

Classification Report

               precision    recall  f1-score   support

            O       0.96      0.98      0.97       734
     B-Artist       0.77      0.77      0.77        39
     I-Artist       0.76      0.74      0.75        35
B-Work of Art       0.71      0.65      0.68        31
I-Work of Art       0.75      0.57      0.65        42

     accuracy                           0.93       881
    macro avg       0.79      0.74      0.76       881
 weighted avg       0.93      0.93      0.93       881



In [50]:
from collections import Counter

# Number of instances in each sequence
y_true_counted = Counter(y_true)
y_pred_counted = Counter(y_pred)

print("Count of y_true instances:", sorted(y_true_counted.items()))
print("Count of y_pred instances:", sorted(y_pred_counted.items()))

Count of y_true instances: [('B-Artist', 39), ('B-Work of Art', 31), ('I-Artist', 35), ('I-Work of Art', 42), ('O', 734)]
Count of y_pred instances: [('B-Artist', 39), ('B-Work of Art', 28), ('I-Artist', 34), ('I-Work of Art', 32), ('O', 748)]


In [51]:
def df_from_result(result_tuple):
    df = pd.DataFrame(columns=['ID', 'True Label', 'Predicted Label', 'Token'])
    
    id = result_tuple[0]
    true_labels = convert_id2label(music['test']['ner_tags'][id])
    pred_labels = match_list(result_tuple[1], true_labels)
    text_sequence = music['test']['tokens'][id]

    for i in range(len(text_sequence)):
        new_row = (id, true_labels[i], pred_labels[i], text_sequence[i])
        df.loc[len(df)] = new_row
        
    return df

In [52]:
# Convert results to DataFrame format for easy viewing
y_df = pd.DataFrame(columns=['ID', 'True Label', 'Predicted Label'])

for i in test_results:
    result_df = df_from_result(i)
    y_df = pd.concat([y_df, result_df])

# Display all results
pd.set_option('display.max_rows', None)

print(y_df)

    ID     True Label Predicted Label            Token
0    0       B-Artist        B-Artist              rob
1    0       I-Artist        I-Artist           thomas
2    0  B-Work of Art   B-Work of Art           little
3    0  I-Work of Art   I-Work of Art          wonders
4    0              O               O   recomendations
0    1              O               O          looking
1    1              O               O               or
2    1              O               O           spacey
3    1              O               O      psychedelic
4    1              O               O            music
0    2              O               O            songs
1    2              O               O             like
2    2              O               O             this
3    2              O               O              one
0    3              O               O      suggestions
1    3              O               O              for
2    3              O               O             post
3    3    

In [53]:
from datetime import datetime

# Save results to CSV
save_time = datetime.now().strftime("%Y%m%d%H%M")
filename = f"{save_time}_gpt_music_results.csv"
y_df.to_csv(filename, index=False)