In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd

model_path = "C:\\Users\\migue\\Downloads\\trial_3_epoch_4_step_Best_model.pt"
base_model = "yiyanghkust/finbert-tone"
num_labels = 3
csv_path = "C:\\Users\\migue\\OneDrive\\Escritorio\\dataset_comprobacion_EN.csv"

print(f"Cargando el tokenizador y el modelo base: {base_model}")
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels=num_labels)
model.load_state_dict(torch.load(model_path))
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")
model.to(device)

try:
    df = pd.read_csv(csv_path)
    print("Dataset cargado exitosamente.")
    
    if 'clean_text' not in df.columns:
        print("Error: El dataset no tiene la columna 'clean_text'. Por favor, renómbrala.")
        exit()
    
    texts = df['clean_text'].tolist()
    
except FileNotFoundError:
    print(f"Error: No se encontró el archivo en la ruta: {csv_path}")
    exit()

batch_size = 32
predictions = []

with torch.no_grad():
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        encoded = tokenizer(batch_texts, padding=True, truncation=True, return_tensors='pt').to(device)
        
        outputs = model(**encoded)
        logits = outputs.logits
        batch_preds = torch.argmax(logits, dim=1).cpu().numpy()
        predictions.extend(batch_preds)

label_map_reverse = {0: 'positive', 1: 'negative', 2: 'neutral'}
df['predicted_sentiment'] = [label_map_reverse[p] for p in predictions]

print("\n--- Predicciones del modelo ---")
print(df[['clean_text', 'predicted_sentiment']].head())


Cargando el tokenizador y el modelo base: yiyanghkust/finbert-tone


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Usando dispositivo: cuda
Dataset cargado exitosamente.

--- Predicciones del modelo ---
                                                                 clean_text  \
0          Bitcoin climbs above $65,000 as investors eye next ETF decision.   
1      Major bank warns of potential crash in crypto market led by Bitcoin.   
2  Bitcoin adoption in South America accelerates amid economic uncertainty.   
3       El Salvador announces new Bitcoin bonds to boost national treasury.   
4                   Hackers steal $30 million in Bitcoin from top exchange.   

  predicted_sentiment  
0             neutral  
1            positive  
2             neutral  
3            negative  
4            negative  


In [16]:
pd.set_option('display.max_colwidth', None)
df

Unnamed: 0,ID,clean_text,predicted_sentiment
0,1,"Bitcoin climbs above $65,000 as investors eye next ETF decision.",neutral
1,2,Major bank warns of potential crash in crypto market led by Bitcoin.,positive
2,3,Bitcoin adoption in South America accelerates amid economic uncertainty.,neutral
3,4,El Salvador announces new Bitcoin bonds to boost national treasury.,negative
4,5,Hackers steal $30 million in Bitcoin from top exchange.,negative
5,6,Tesla may reconsider accepting Bitcoin for vehicle purchases.,negative
6,7,Bitcoin transaction fees hit lowest point since 2021.,positive
7,8,"Federal Reserve hints at rate hike, Bitcoin reacts with slight drop.",positive
8,9,New legislation could regulate Bitcoin trading in the EU.,negative
9,10,"Bitcoin whale moves 10,000 BTC to unknown wallet, sparking speculation.",positive
