In [11]:
import pandas as pd
from transformers import GPT2Tokenizer
from transformers import GPT2LMHeadModel
import torch

In [5]:
df = pd.read_csv('data/filter_data.csv')
# Inicializar el tokenizador de GPT-2
tokenizer = GPT2Tokenizer.from_pretrained('gpt2_spa_local')  # O la ruta a tu modelo si ya lo descargaste
# Crear una lista para almacenar los datos preparados
prepared_data = []

# Iterar sobre cada fila del DataFrame
for index, row in df.iterrows():
    # Concatenar letras con sentimientos
    input_text = f"{row['lyrics']} [SENTIMENTS] {row['feelings']}"
    prepared_data.append(input_text)
# Tokenizar los datos
tokenized_data = [tokenizer.encode(text, return_tensors='pt') for text in prepared_data]
# Cargar el modelo
model = GPT2LMHeadModel.from_pretrained('./gpt2_spa_local')
model.train()  # Establecer el modelo en modo de entrenamiento

# Mover el modelo a la GPU (si está disponible)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)


In [13]:
from torch.optim import AdamW

# Configuración del optimizador
optimizer = AdamW(model.parameters(), lr=5e-5)

# Número de épocas
num_epochs = 3  # Ajusta según sea necesario

for epoch in range(num_epochs):
    for tokenized_input in tokenized_data:
        # Mover datos a la GPU
        tokenized_input = tokenized_input.to(device)

        # Adelante (Forward)
        outputs = model(tokenized_input, labels=tokenized_input)
        loss = outputs.loss

        # Retropropagación (Backward)
        loss.backward()

        # Actualizar los pesos
        optimizer.step()
        optimizer.zero_grad()

        print(f"Epoch: {epoch}, Loss: {loss.item()}")


Epoch: 0, Loss: 6.125394821166992
Epoch: 0, Loss: 4.521975517272949
Epoch: 0, Loss: 6.078360080718994
Epoch: 0, Loss: 5.354247093200684
Epoch: 0, Loss: 7.365292549133301
Epoch: 0, Loss: 6.2282328605651855
Epoch: 0, Loss: 6.046369552612305
Epoch: 0, Loss: 5.607619762420654
Epoch: 0, Loss: 4.170190334320068
Epoch: 0, Loss: 5.716723442077637
Epoch: 0, Loss: 7.027527332305908
Epoch: 0, Loss: 6.694766998291016
Epoch: 0, Loss: 5.275996685028076
Epoch: 0, Loss: 4.747635364532471
Epoch: 0, Loss: 4.751110553741455
Epoch: 0, Loss: 4.4834885597229
Epoch: 0, Loss: 5.4618821144104
Epoch: 0, Loss: 5.167383670806885
Epoch: 0, Loss: 4.726057052612305
Epoch: 0, Loss: 4.949235916137695
Epoch: 0, Loss: 3.388848066329956
Epoch: 0, Loss: 5.1946516036987305
Epoch: 0, Loss: 4.779718399047852
Epoch: 0, Loss: 3.6908445358276367
Epoch: 0, Loss: 4.80611515045166
Epoch: 0, Loss: 4.464014053344727
Epoch: 0, Loss: 3.011209011077881
Epoch: 0, Loss: 4.578606128692627
Epoch: 0, Loss: 4.513789176940918
Epoch: 0, Loss: 

KeyboardInterrupt: 

In [None]:
model.save_pretrained('./trained_gpt2_spa_local')
tokenizer.save_pretrained('./trained_gpt2_spa_local')