In [17]:
from dotenv import load_dotenv
import pandas as pd
import openai
import json
import os

load_dotenv()
openai.api_key = os.environ['OPENAI_API_KEY']
client = openai.OpenAI()

In [18]:
input_path = '..\..\data\\txt-from-midi'
output_path = '..\..\data\\generations\\gpt'

In [19]:
def get_prompt(music):
        return  f'''You will classify music represented in symbolic form as either positive or negative.

### Symbolic Representation
d_[duration]_[dots]: Defines the duration of the upcoming notes. The [duration] specifies the type of note (e.g., breve, whole, half, quarter, eighth, 16th, or 32nd). The [dots] indicates the number of dots extending the noteâ€™s duration, and can be any integer from 0 to 3.
v_[velocity]: Indicates the velocity (or loudness) of the following notes. Velocity is discretized into bins of size 4, allowing values such as 4, 8, 12, up to 128.
t_[tempo]: Changes the tempo of the piece, measured in beats per minute (bpm). Tempo is discretized into bins of size 4, ranging from 24 to 160 bpm. This controls the speed at which the piece is played.
w_[wait]: Specifies the number of time steps (units of waiting) that pass before the next musical event occurs. The value associated with w, such as in w_2 or w_3, represents the number of time steps with no musical events.
\n: Marks the end of the piece.

### Music
{music}


Your answer must strictly follow this format:
- answer: A string, either "positive" or "negative"
- justify: A brief explanation justifying your classification
'''

In [20]:
data = []
for filename in os.listdir(input_path):
    if filename.endswith('.txt'):  
        file_path = os.path.join(input_path, filename)
        
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        
        data.append({'file_name': filename, 'content': content})
    
df = pd.DataFrame(data)

In [21]:
def get_response(prompt):
    completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "user", "content": prompt}
    ],
    temperature=1.0,
    top_p=0.95,
    max_completion_tokens=500
    )
    return completion.choices[0].message

In [None]:
results = []
for idx, row in df.iterrows():
    file_name = row['file_name']
    music_content = row['content'][:1800] 
    
    
    response = get_response(get_prompt(music_content))
    generated_texts = response.content
    
    result = {
        "file_name": file_name,
        "music_passed_to_model": music_content,
        "model_output": generated_texts
    }
    
    results.append(result)
    
    os.makedirs(output_path, exist_ok=True)
    output_name = 'gpt-4o-mini'
    if '/' in output_name:
        output_name = output_name.split('/')[-1]
        
    json_output_path = os.path.join(output_path, f"{output_name}_temp1.json")
    with open(json_output_path, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)