In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [48]:
# Load the CSV data
data = pd.read_csv("data_moods.csv")
truck_data = pd.read_csv("data.csv")

In [50]:


numeric_cols = ['danceability', 'acousticness', 'energy', 'instrumentalness', 'liveness', 'valence',  'speechiness']

# Round to two decimal places and multiply by 100
for col in numeric_cols:
    data[col] = (data[col] * 100).round(0)
    truck_data[col] = (truck_data[col] * 100).round(0)

In [4]:
data.head()

Unnamed: 0,name,album,artist,id,release_date,popularity,length,danceability,acousticness,energy,instrumentalness,liveness,valence,loudness,speechiness,tempo,key,time_signature,mood
0,1999,1999,Prince,2H7PHVdQ3mXqEHXcvclTB0,1982-10-27,68,379266,87.0,14.0,73.0,0.0,8.0,62.0,-8.2,8.0,118.52,5,4,Happy
1,23,23,Blonde Redhead,4HIwL9ii9CcXpTOTzMq0MP,2007-04-16,43,318800,38.0,2.0,83.0,20.0,15.0,17.0,-5.07,5.0,120.26,8,4,Sad
2,9 Crimes,9,Damien Rice,5GZEeowhvSieFDiR8fQ2im,2006-11-06,60,217946,35.0,91.0,14.0,0.0,9.0,12.0,-15.33,3.0,136.17,0,4,Sad
3,99 Luftballons,99 Luftballons,Nena,6HA97v4wEGQ5TUClRM0XLc,1984-08-21,2,233000,47.0,9.0,44.0,0.0,11.0,59.0,-12.86,6.0,193.1,4,4,Happy
4,A Boy Brushed Red Living In Black And White,They're Only Chasing Safety,Underoath,47IWLfIKOKhFnz1FUEUIkE,2004-01-01,60,268000,42.0,0.0,93.0,0.0,14.0,44.0,-3.6,11.0,169.88,1,4,Energetic


In [52]:
features = ['danceability', 'acousticness', 'energy', 'instrumentalness', 'liveness', 'valence', 'loudness', 'speechiness', 'tempo', 'key']
X = data[features]
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['mood'])

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_val.shape, y_train.shape, y_val.shape

X_pre= truck_data[features]

In [6]:
X_train

Unnamed: 0,danceability,acousticness,energy,instrumentalness,liveness,valence,loudness,speechiness,tempo,key
278,46.0,92.0,18.0,0.0,12.0,18.0,-14.85,4.0,80.75,0
110,69.0,10.0,96.0,1.0,8.0,45.0,-3.66,11.0,123.98,1
82,74.0,95.0,31.0,0.0,19.0,36.0,-7.83,7.0,111.20,4
51,57.0,84.0,16.0,0.0,9.0,9.0,-14.40,3.0,109.92,6
218,28.0,85.0,26.0,0.0,10.0,11.0,-9.87,3.0,98.66,11
...,...,...,...,...,...,...,...,...,...,...
71,60.0,2.0,90.0,68.0,10.0,14.0,-5.98,5.0,127.97,5
106,70.0,0.0,68.0,0.0,6.0,55.0,-4.18,6.0,150.12,2
270,27.0,99.0,0.0,89.0,9.0,15.0,-33.73,5.0,132.58,7
435,30.0,17.0,54.0,80.0,13.0,19.0,-10.64,5.0,142.72,7


In [53]:
# Convert features to a textual format
text_data_train = X_train.apply(lambda x: ', '.join(f"{feature}: {x[feature]}" for feature in features), axis=1).tolist()
text_data_val = X_val.apply(lambda x: ', '.join(f"{feature}: {x[feature]}" for feature in features), axis=1).tolist()
text_data_pre=X_pre.apply(lambda x: ', '.join(f"{feature}: {x[feature]}" for feature in features), axis=1).tolist()


In [54]:
from transformers import GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")

# Set the padding token
tokenizer.pad_token = tokenizer.eos_token

# Now, you can tokenize your data
train_encodings = tokenizer(text_data_train, truncation=True, padding=True, return_tensors="pt", max_length=128)
val_encodings = tokenizer(text_data_val, truncation=True, padding=True, return_tensors="pt", max_length=128)
pred_encoding=tokenizer(text_data_pre, truncation=True, padding=True, return_tensors="pt", max_length=128)

In [11]:
from transformers import GPT2ForSequenceClassification, Trainer, TrainingArguments

#model = GPT2ForSequenceClassification.from_pretrained("distilgpt2", num_labels=4)
#model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=4)
model = GPT2ForSequenceClassification.from_pretrained("gpt2-large", num_labels=4)
#model = GPT2ForSequenceClassification.from_pretrained("gpt2-xl", num_labels=4)
model.config.pad_token_id = tokenizer.eos_token_id


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2-large and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from torch.utils.data import Dataset
import torch
from torch.utils.data import Dataset
import numpy as np

class MoodDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)  # Ensure labels are of type torch.long
        return item
    def __len__(self):
        return len(self.labels)

train_dataset = MoodDataset(train_encodings, y_train)
val_dataset = MoodDataset(val_encodings, y_val)


In [13]:
from sklearn.metrics import accuracy_score

def compute_metrics(p):
    pred = np.argmax(p.predictions, axis=1)
    return {"accuracy": accuracy_score(p.label_ids, pred)}

In [14]:
training_args = TrainingArguments(
    output_dir='./results',   # Add this line to specify the output directory
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)



In [15]:
trainer.train()
trainer.evaluate()

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  2%|▏         | 11/690 [00:03<02:39,  4.25it/s]

{'loss': 1.8957, 'learning_rate': 4.9275362318840584e-05, 'epoch': 0.14}


  3%|▎         | 21/690 [00:05<02:38,  4.23it/s]

{'loss': 1.3993, 'learning_rate': 4.855072463768116e-05, 'epoch': 0.29}


  4%|▍         | 31/690 [00:08<02:38,  4.15it/s]

{'loss': 1.3368, 'learning_rate': 4.782608695652174e-05, 'epoch': 0.43}


  6%|▌         | 41/690 [00:10<02:25,  4.46it/s]

{'loss': 1.1913, 'learning_rate': 4.710144927536232e-05, 'epoch': 0.58}


  7%|▋         | 51/690 [00:12<02:23,  4.46it/s]

{'loss': 0.8772, 'learning_rate': 4.63768115942029e-05, 'epoch': 0.72}


  9%|▉         | 61/690 [00:14<02:11,  4.80it/s]

{'loss': 0.9334, 'learning_rate': 4.565217391304348e-05, 'epoch': 0.87}


                                                
 10%|█         | 69/690 [00:17<02:03,  5.04it/s]

{'eval_loss': 0.7001689672470093, 'eval_accuracy': 0.7246376811594203, 'eval_runtime': 0.7592, 'eval_samples_per_second': 181.773, 'eval_steps_per_second': 23.71, 'epoch': 1.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 10%|█         | 71/690 [00:26<23:41,  2.30s/it]

{'loss': 0.8297, 'learning_rate': 4.492753623188406e-05, 'epoch': 1.01}


 12%|█▏        | 81/690 [00:28<02:42,  3.74it/s]

{'loss': 0.7305, 'learning_rate': 4.4202898550724645e-05, 'epoch': 1.16}


 13%|█▎        | 91/690 [00:30<02:04,  4.83it/s]

{'loss': 0.7583, 'learning_rate': 4.347826086956522e-05, 'epoch': 1.3}


 15%|█▍        | 101/690 [00:32<02:01,  4.84it/s]

{'loss': 0.8702, 'learning_rate': 4.27536231884058e-05, 'epoch': 1.45}


 16%|█▌        | 111/690 [00:35<02:00,  4.80it/s]

{'loss': 0.8109, 'learning_rate': 4.202898550724638e-05, 'epoch': 1.59}


 18%|█▊        | 121/690 [00:37<01:57,  4.85it/s]

{'loss': 0.7228, 'learning_rate': 4.130434782608696e-05, 'epoch': 1.74}


 19%|█▉        | 131/690 [00:39<01:54,  4.87it/s]

{'loss': 0.739, 'learning_rate': 4.057971014492754e-05, 'epoch': 1.88}


                                                 
 20%|██        | 138/690 [00:41<01:47,  5.13it/s]

{'eval_loss': 0.6647589802742004, 'eval_accuracy': 0.7318840579710145, 'eval_runtime': 0.7538, 'eval_samples_per_second': 183.068, 'eval_steps_per_second': 23.878, 'epoch': 2.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 20%|██        | 141/690 [00:51<15:19,  1.67s/it]

{'loss': 0.5433, 'learning_rate': 3.985507246376812e-05, 'epoch': 2.03}


 22%|██▏       | 151/690 [00:53<02:12,  4.06it/s]

{'loss': 0.6403, 'learning_rate': 3.91304347826087e-05, 'epoch': 2.17}


 23%|██▎       | 161/690 [00:55<01:49,  4.82it/s]

{'loss': 1.1279, 'learning_rate': 3.8405797101449274e-05, 'epoch': 2.32}


 25%|██▍       | 171/690 [00:57<01:46,  4.88it/s]

{'loss': 0.5791, 'learning_rate': 3.7681159420289856e-05, 'epoch': 2.46}


 26%|██▌       | 181/690 [00:59<01:45,  4.85it/s]

{'loss': 0.4933, 'learning_rate': 3.695652173913043e-05, 'epoch': 2.61}


 28%|██▊       | 191/690 [01:01<01:42,  4.88it/s]

{'loss': 1.0612, 'learning_rate': 3.6231884057971014e-05, 'epoch': 2.75}


 29%|██▉       | 201/690 [01:03<01:39,  4.89it/s]

{'loss': 0.5227, 'learning_rate': 3.5507246376811596e-05, 'epoch': 2.9}


                                                 
 30%|███       | 207/690 [01:05<01:34,  5.13it/s]

{'eval_loss': 0.6571718454360962, 'eval_accuracy': 0.7391304347826086, 'eval_runtime': 0.744, 'eval_samples_per_second': 185.486, 'eval_steps_per_second': 24.194, 'epoch': 3.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 31%|███       | 211/690 [01:15<09:27,  1.18s/it]

{'loss': 0.4466, 'learning_rate': 3.478260869565218e-05, 'epoch': 3.04}


 32%|███▏      | 221/690 [01:17<01:49,  4.29it/s]

{'loss': 0.4558, 'learning_rate': 3.405797101449276e-05, 'epoch': 3.19}


 33%|███▎      | 231/690 [01:19<01:34,  4.84it/s]

{'loss': 0.5367, 'learning_rate': 3.3333333333333335e-05, 'epoch': 3.33}


 35%|███▍      | 241/690 [01:21<01:31,  4.88it/s]

{'loss': 0.6143, 'learning_rate': 3.260869565217392e-05, 'epoch': 3.48}


 36%|███▋      | 251/690 [01:23<01:30,  4.85it/s]

{'loss': 0.494, 'learning_rate': 3.188405797101449e-05, 'epoch': 3.62}


 38%|███▊      | 261/690 [01:25<01:28,  4.86it/s]

{'loss': 0.4046, 'learning_rate': 3.1159420289855074e-05, 'epoch': 3.77}


 39%|███▉      | 271/690 [01:27<01:26,  4.83it/s]

{'loss': 0.3807, 'learning_rate': 3.0434782608695656e-05, 'epoch': 3.91}


                                                 
 40%|████      | 276/690 [01:29<01:20,  5.15it/s]

{'eval_loss': 1.0575900077819824, 'eval_accuracy': 0.6884057971014492, 'eval_runtime': 0.7421, 'eval_samples_per_second': 185.951, 'eval_steps_per_second': 24.255, 'epoch': 4.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 41%|████      | 281/690 [01:39<06:21,  1.07it/s]

{'loss': 0.3811, 'learning_rate': 2.971014492753623e-05, 'epoch': 4.06}


 42%|████▏     | 291/690 [01:41<01:30,  4.43it/s]

{'loss': 0.3569, 'learning_rate': 2.8985507246376814e-05, 'epoch': 4.2}


 44%|████▎     | 301/690 [01:43<01:20,  4.86it/s]

{'loss': 0.4335, 'learning_rate': 2.826086956521739e-05, 'epoch': 4.35}


 45%|████▌     | 311/690 [01:45<01:17,  4.88it/s]

{'loss': 0.2988, 'learning_rate': 2.753623188405797e-05, 'epoch': 4.49}


 47%|████▋     | 321/690 [01:47<01:15,  4.87it/s]

{'loss': 0.3596, 'learning_rate': 2.6811594202898553e-05, 'epoch': 4.64}


 48%|████▊     | 331/690 [01:49<01:16,  4.71it/s]

{'loss': 0.4133, 'learning_rate': 2.608695652173913e-05, 'epoch': 4.78}


 49%|████▉     | 341/690 [01:51<01:11,  4.90it/s]

{'loss': 0.3977, 'learning_rate': 2.5362318840579714e-05, 'epoch': 4.93}


                                                 
 50%|█████     | 345/690 [01:53<01:06,  5.19it/s]

{'eval_loss': 1.0303715467453003, 'eval_accuracy': 0.7101449275362319, 'eval_runtime': 0.7741, 'eval_samples_per_second': 178.275, 'eval_steps_per_second': 23.253, 'epoch': 5.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 51%|█████     | 351/690 [02:04<04:08,  1.36it/s]

{'loss': 0.4846, 'learning_rate': 2.4637681159420292e-05, 'epoch': 5.07}


 52%|█████▏    | 361/690 [02:06<01:18,  4.17it/s]

{'loss': 0.2299, 'learning_rate': 2.391304347826087e-05, 'epoch': 5.22}


 54%|█████▍    | 371/690 [02:08<01:12,  4.39it/s]

{'loss': 0.2263, 'learning_rate': 2.318840579710145e-05, 'epoch': 5.36}


 55%|█████▌    | 381/690 [02:10<01:09,  4.43it/s]

{'loss': 0.1623, 'learning_rate': 2.246376811594203e-05, 'epoch': 5.51}


 57%|█████▋    | 391/690 [02:13<01:04,  4.65it/s]

{'loss': 0.1895, 'learning_rate': 2.173913043478261e-05, 'epoch': 5.65}


 58%|█████▊    | 401/690 [02:15<00:59,  4.83it/s]

{'loss': 0.3415, 'learning_rate': 2.101449275362319e-05, 'epoch': 5.8}


 60%|█████▉    | 411/690 [02:17<00:58,  4.80it/s]

{'loss': 0.3918, 'learning_rate': 2.028985507246377e-05, 'epoch': 5.94}


                                                 
 60%|██████    | 414/690 [02:18<00:54,  5.10it/s]

{'eval_loss': 1.0860475301742554, 'eval_accuracy': 0.7463768115942029, 'eval_runtime': 0.748, 'eval_samples_per_second': 184.492, 'eval_steps_per_second': 24.064, 'epoch': 6.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 61%|██████    | 421/690 [02:28<02:25,  1.85it/s]

{'loss': 0.2412, 'learning_rate': 1.956521739130435e-05, 'epoch': 6.09}


 62%|██████▏   | 431/690 [02:30<00:55,  4.65it/s]

{'loss': 0.0431, 'learning_rate': 1.8840579710144928e-05, 'epoch': 6.23}


 64%|██████▍   | 441/690 [02:32<00:51,  4.85it/s]

{'loss': 0.1092, 'learning_rate': 1.8115942028985507e-05, 'epoch': 6.38}


 65%|██████▌   | 451/690 [02:34<00:49,  4.86it/s]

{'loss': 0.1188, 'learning_rate': 1.739130434782609e-05, 'epoch': 6.52}


 67%|██████▋   | 461/690 [02:37<00:46,  4.88it/s]

{'loss': 0.3314, 'learning_rate': 1.6666666666666667e-05, 'epoch': 6.67}


 68%|██████▊   | 471/690 [02:39<00:45,  4.85it/s]

{'loss': 0.14, 'learning_rate': 1.5942028985507246e-05, 'epoch': 6.81}


 70%|██████▉   | 481/690 [02:41<00:42,  4.87it/s]

{'loss': 0.1276, 'learning_rate': 1.5217391304347828e-05, 'epoch': 6.96}


                                                 
 70%|███████   | 483/690 [02:42<00:40,  5.14it/s]

{'eval_loss': 1.1864184141159058, 'eval_accuracy': 0.7681159420289855, 'eval_runtime': 0.744, 'eval_samples_per_second': 185.485, 'eval_steps_per_second': 24.194, 'epoch': 7.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 71%|███████   | 491/690 [02:53<01:29,  2.22it/s]

{'loss': 0.0449, 'learning_rate': 1.4492753623188407e-05, 'epoch': 7.1}


 73%|███████▎  | 501/690 [02:55<00:40,  4.69it/s]

{'loss': 0.0593, 'learning_rate': 1.3768115942028985e-05, 'epoch': 7.25}


 74%|███████▍  | 511/690 [02:57<00:36,  4.87it/s]

{'loss': 0.0454, 'learning_rate': 1.3043478260869566e-05, 'epoch': 7.39}


 76%|███████▌  | 521/690 [02:59<00:34,  4.89it/s]

{'loss': 0.0304, 'learning_rate': 1.2318840579710146e-05, 'epoch': 7.54}


 77%|███████▋  | 531/690 [03:01<00:33,  4.81it/s]

{'loss': 0.1014, 'learning_rate': 1.1594202898550725e-05, 'epoch': 7.68}


 78%|███████▊  | 541/690 [03:03<00:31,  4.75it/s]

{'loss': 0.0805, 'learning_rate': 1.0869565217391305e-05, 'epoch': 7.83}


 80%|███████▉  | 551/690 [03:05<00:28,  4.86it/s]

{'loss': 0.0378, 'learning_rate': 1.0144927536231885e-05, 'epoch': 7.97}


                                                 
 80%|████████  | 552/690 [03:06<00:26,  5.11it/s]

{'eval_loss': 1.5577775239944458, 'eval_accuracy': 0.782608695652174, 'eval_runtime': 0.7484, 'eval_samples_per_second': 184.395, 'eval_steps_per_second': 24.052, 'epoch': 8.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 81%|████████▏ | 561/690 [03:17<00:48,  2.64it/s]

{'loss': 0.019, 'learning_rate': 9.420289855072464e-06, 'epoch': 8.12}


 83%|████████▎ | 571/690 [03:19<00:25,  4.73it/s]

{'loss': 0.011, 'learning_rate': 8.695652173913044e-06, 'epoch': 8.26}


 84%|████████▍ | 581/690 [03:21<00:22,  4.82it/s]

{'loss': 0.0031, 'learning_rate': 7.971014492753623e-06, 'epoch': 8.41}


 86%|████████▌ | 591/690 [03:23<00:20,  4.83it/s]

{'loss': 0.021, 'learning_rate': 7.246376811594203e-06, 'epoch': 8.55}


 87%|████████▋ | 601/690 [03:25<00:18,  4.81it/s]

{'loss': 0.0182, 'learning_rate': 6.521739130434783e-06, 'epoch': 8.7}


 89%|████████▊ | 611/690 [03:27<00:16,  4.82it/s]

{'loss': 0.0366, 'learning_rate': 5.797101449275362e-06, 'epoch': 8.84}


 90%|█████████ | 621/690 [03:29<00:13,  5.12it/s]

{'loss': 0.0018, 'learning_rate': 5.072463768115943e-06, 'epoch': 8.99}


                                                 
 90%|█████████ | 621/690 [03:30<00:13,  5.12it/s]

{'eval_loss': 1.750072956085205, 'eval_accuracy': 0.7753623188405797, 'eval_runtime': 0.7481, 'eval_samples_per_second': 184.464, 'eval_steps_per_second': 24.061, 'epoch': 9.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 91%|█████████▏| 631/690 [03:41<00:19,  3.07it/s]

{'loss': 0.0002, 'learning_rate': 4.347826086956522e-06, 'epoch': 9.13}


 93%|█████████▎| 641/690 [03:43<00:10,  4.73it/s]

{'loss': 0.0007, 'learning_rate': 3.6231884057971017e-06, 'epoch': 9.28}


 94%|█████████▍| 651/690 [03:45<00:08,  4.86it/s]

{'loss': 0.0006, 'learning_rate': 2.898550724637681e-06, 'epoch': 9.42}


 96%|█████████▌| 661/690 [03:48<00:05,  4.86it/s]

{'loss': 0.0025, 'learning_rate': 2.173913043478261e-06, 'epoch': 9.57}


 97%|█████████▋| 671/690 [03:50<00:03,  4.84it/s]

{'loss': 0.0088, 'learning_rate': 1.4492753623188406e-06, 'epoch': 9.71}


 99%|█████████▊| 681/690 [03:52<00:01,  4.84it/s]

{'loss': 0.0, 'learning_rate': 7.246376811594203e-07, 'epoch': 9.86}


100%|██████████| 690/690 [03:54<00:00,  5.13it/s]

{'loss': 0.0002, 'learning_rate': 0.0, 'epoch': 10.0}


                                                 
100%|██████████| 690/690 [03:54<00:00,  5.13it/s]

{'eval_loss': 1.7938377857208252, 'eval_accuracy': 0.7753623188405797, 'eval_runtime': 0.6624, 'eval_samples_per_second': 208.319, 'eval_steps_per_second': 27.172, 'epoch': 10.0}


100%|██████████| 690/690 [04:06<00:00,  2.80it/s]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'train_runtime': 246.8062, 'train_samples_per_second': 22.204, 'train_steps_per_second': 2.796, 'train_loss': 0.40140211117045577, 'epoch': 10.0}


100%|██████████| 18/18 [00:00<00:00, 27.65it/s]


{'eval_loss': 1.5577775239944458,
 'eval_accuracy': 0.782608695652174,
 'eval_runtime': 0.693,
 'eval_samples_per_second': 199.14,
 'eval_steps_per_second': 25.975,
 'epoch': 10.0}

In [55]:
from torch.nn.functional import softmax

# Ensure your model is on the correct device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set your batch size
batch_size = 5  # Adjust this to a smaller number if your GPU memory is limited

# Initialize an empty list to store logits from each batch
all_logits = []

# Process the data in batches
for i in range(0, len(pred_encoding['input_ids']), batch_size):
    # Extract the batch from pred_encoding
    batch = {key: val[i:i + batch_size] for key, val in pred_encoding.items()}
    
    # Move batch to the same device as the model
    batch = {k: v.to(device) for k, v in batch.items()}
    
    # Forward pass without gradient calculation (to save memory)
    with torch.no_grad():
        outputs = model(**batch)
        logits = outputs.logits
        all_logits.append(logits)

# Now, all_logits contains logits from each batch. You can concatenate them and process further.
all_logits = torch.cat(all_logits, dim=0)

# Apply softmax to convert logits to probabilities (if needed)
probabilities = softmax(all_logits, dim=1)


In [56]:
# Convert probabilities tensor to CPU and then to a NumPy array
probabilities_cpu = probabilities.cpu().numpy()

# Now you can use the label_encoder to inverse transform the predictions
mood_predictions = label_encoder.inverse_transform(probabilities_cpu.argmax(axis=1))

# mood_predictions now contains the original mood labels


In [57]:
truck_data['predicted_mood'] = mood_predictions

In [60]:
truck_data['predicted_mood']

0          Calm
1           Sad
2          Calm
3           Sad
4           Sad
          ...  
170648    Happy
170649    Happy
170650    Happy
170651    Happy
170652    Happy
Name: predicted_mood, Length: 170653, dtype: object

In [61]:
truck_data.to_csv("data_music_with_predictions.csv", index=False)