### Checking GPU availability

In [None]:
import torch
if torch.cuda.is_available():   #checking if GPU is available
    print("CUDA is available. Here are the CUDA devices:") 
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}") #Print name of available GPUs

In [2]:
import os
os.chdir(os.path.dirname(os.path.abspath('.')))

### Importing libraries

In [3]:
from erasmo import Erasmo
import pandas as pd
import metrics

### Loading Train and Test Datasets

In [4]:
df_train = pd.read_csv("./notebook/data/train.csv")
df_test = pd.read_csv("./notebook/data/test.csv")


In [None]:
df_train.head(1)

# Erasmo Framework

### Hyperparameters

In [None]:
model_name = "gpt2-medium"
n_epochs = 2
save_steps = 2000
logging_steps = 300
experiment_dir = "./yelp"
batch_size = 8
text_to_num = False

### Erasmo base and NV

In [None]:
erasmo_base = Erasmo(
    model_name,                     # Name of the large language model used (see HuggingFace for more options)
    epochs=n_epochs,                # Number of epochs to train (only one epoch for demonstration)
    save_steps=save_steps,          # Save model weights every x steps
    logging_steps=logging_steps,    # Log the loss and learning rate every x steps
    experiment_dir=experiment_dir,  # Name of the directory where all intermediate steps are saved
    text_to_num=False,              # Convert text to numbers
    batch_size=batch_size,          # Set the batch size
)


In [None]:
erasmo_nv = Erasmo(
    model_name,                     # Name of the large language model used (see HuggingFace for more options)
    epochs=n_epochs,                # Number of epochs to train (only one epoch for demonstration)
    save_steps=save_steps,          # Save model weights every x steps
    logging_steps=logging_steps,    # Log the loss and learning rate every x steps
    experiment_dir=experiment_dir,  # Name of the directory where all intermediate steps are saved
    text_to_num=True,               # Convert text to numbers
    batch_size=batch_size,          # Set the batch size
)

### Training

In [None]:
trainer_base = erasmo_base.fit(df_train)

In [None]:
trainer_nv = erasmo_nv.fit(df_train)

### Generating Embeddings from last layer

In [None]:
embedding_base = erasmo_base.generate_embeddings_from_last_layer(df_test)

In [None]:
embedding_nv = erasmo_nv.generate_embeddings_from_last_layer(df_test)

### Clustering Quality Assessment

In [34]:
results_base = metrics.clustering_quality_assessment(data=embedding_base, dataset_name='yelp_base', n_clusters=2)
results_base

In [None]:
results_nv = metrics.clustering_quality_assessment(data=embedding_nv, dataset_name='yelp_nv', n_clusters=2)
results_nv