This file runs the functions which process the data and then train the BERT-tiny model with a hyperparameter sweep.

# Dependencies

In [None]:
# This file is intended to be executed in a Google Colab environment.

# Import the necessary library to mount Google Drive to this Colab instance.
from google.colab import drive

# Mount the Google Drive to access files stored there.
drive.mount('/content/drive')

# Change the working directory to a specific folder in the mounted Google Drive.
# Modify the path according to the actual location of your files.
%cd drive/MyDrive/WandB/week_3

# Import the sys library to manipulate the Python path.
import sys

# Append the current directory to the Python path.
sys.path.append('./')

# Load the autoreload extension for Jupyter Notebook.
%load_ext autoreload

# Set the autoreload mode to automatically reload modules when changes are detected.
%autoreload 2

# Install Python packages listed in the requirements.txt file.
!pip install -r requirements.txt

# Install the Kaggle package, likely for data access or Kaggle-specific functionality.
!pip install kaggle

In [None]:
# Create a directory named '.kaggle' in the '/root/' directory.
!mkdir /root/.kaggle

# Move the 'kaggle.json' file from its original location to the '/root/.kaggle/' directory.
!mv ../../../../kaggle.json /root/.kaggle/kaggle.json

# Set appropriate permissions (read and write for owner only) for the 'kaggle.json' file.
!chmod 600 /root/.kaggle/kaggle.json

# Process Data

In [None]:
# Download, log and process data

!python process.py

# Train Model

In [None]:
#log in to huggingface hub.

from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Set the environment variable 'WANDB_LOG_MODEL' to 'true'. This enables logging of the model during the experiment.
%env WANDB_LOG_MODEL=true

# Run the Weights & Biases (wandb) sweep using the configuration defined in 'sweep.yaml'.
!wandb sweep sweep.yaml

env: WANDB_LOG_MODEL=true
[34m[1mwandb[0m: Creating sweep from: sweep.yaml
[34m[1mwandb[0m: Created sweep with ID: [33mugks89d5[0m
[34m[1mwandb[0m: View sweep at: [34m[4mhttps://wandb.ai/dmeltzer/mlops-course-assgn3/sweeps/ugks89d5[0m
[34m[1mwandb[0m: Run sweep agent with: [33mwandb agent dmeltzer/mlops-course-assgn3/ugks89d5[0m


In [None]:
# The following code is using the Weights & Biases (wandb) command-line interface (CLI) to start 20 runs of a specified experiment.

# Command to start the agent.
!wandb agent dmeltzer/mlops-course-assgn3/ugks89d5 --count 20

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 88% 943/1071 [00:15<00:01, 73.35it/s][A
 89% 951/1071 [00:15<00:01, 74.43it/s][A
 90% 959/1071 [00:15<00:01, 73.77it/s][A
 90% 967/1071 [00:15<00:01, 74.18it/s][A
 91% 975/1071 [00:15<00:01, 74.97it/s][A
 92% 983/1071 [00:15<00:01, 73.50it/s][A
 93% 991/1071 [00:15<00:01, 73.34it/s][A
 93% 999/1071 [00:15<00:00, 74.62it/s][A
 94% 1007/1071 [00:15<00:00, 73.77it/s][A
 95% 1015/1071 [00:16<00:00, 74.08it/s][A
 96% 1023/1071 [00:16<00:00, 74.92it/s][A
 96% 1031/1071 [00:16<00:00, 75.04it/s][A
 97% 1039/1071 [00:16<00:00, 74.52it/s][A
 98% 1047/1071 [00:16<00:00, 75.54it/s][A
 99% 1055/1071 [00:16<00:00, 74.76it/s][A
 99% 1063/1071 [00:16<00:00, 73.50it/s][A
                                        
  4% 1250/32130 [02:35<23:54, 21.53it/s]
100% 1071/1071 [00:19<00:00, 75.27it/s][A
                                       [A{'eval_loss': 1.4138805866241455, 'eval_accuracy': 0.43575973381588934, 'eval_f1': 0.435