In [None]:
# %% [markdown]
# # Fine-tuning DistilBERT for Multilingual Sentiment Analysis

# %% [code]
# !pip install transformers datasets torch numpy pandas

# %% [code]
# from google.colab import drive
# drive.mount('/content/drive')

# %% [code]
import os

# Set up project directory in Google Drive
project_name = "FineTuned-DistilBERT-Multilingual-Sentiment-Analysis"
base_path = f"/content/drive/MyDrive/{project_name}"
os.makedirs(base_path, exist_ok=True)

# Create subdirectories
checkpoints_dir = os.path.join(base_path, "checkpoints")
logs_dir = os.path.join(base_path, "logs")
best_model_dir = os.path.join(base_path, "best_model")

for dir_path in [checkpoints_dir, logs_dir, best_model_dir]:
    os.makedirs(dir_path, exist_ok=True)

print(f"Project directory created at: {base_path}")

# %% [code]
# !git clone https://github.com/your-username/FineTuned-DistilBERT-Multilingual-Sentiment-Analysis.git
# %cd FineTuned-DistilBERT-Multilingual-Sentiment-Analysis

# %% [code]
import torch
from src.train import train_model, get_training_args
from src.evaluate import run_evaluation

# %% [code]
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# %% [code]
# Modify training arguments
training_args = get_training_args()
training_args.output_dir = checkpoints_dir
training_args.logging_dir = logs_dir

# Train the model
print("Starting training...")
trainer, test_dataset = train_model(training_args)

# Save the best model
print("Saving the best model...")
trainer.save_model(best_model_dir)
trainer.tokenizer.save_pretrained(best_model_dir)

# %% [code]
# Run evaluation
print("Running evaluation...")
run_evaluation(trainer, test_dataset)