# Install and Import packages

In [4]:
! pip install wandb



In [7]:
import os.path
import random
from datetime import time

import numpy as np
import torch
import pickle
from tqdm import tqdm
from torch.utils.data import DataLoader
import wandb
from transformers import AutoTokenizer


# Check environment

In [8]:
# check if the system is running on colab or macbook m1 pro
import platform
import os
op_system = platform.system()


# Import Data

In [9]:
if op_system == 'Darwin':
    # Macbook
    data_path = '../../data/translation/wmt14-en-de/'
elif op_system == 'Linux':
    # Colab
    from google.colab import drive
    drive.mount('/content/drive')

    data_path = '/content/drive/MyDrive/Projects/nlp_emotion/data/'

# Config

In [10]:

# REPORT_WANDB = True
REPORT_WANDB = False
run_name = "self_implemented_transformer_not_converging"

check_point_folder_path = data_path + "/check_point"

device_type = "cpu"
if not torch.cuda.is_available():
    device_type = "mps"
elif op_system == 'Darwin':
    device_type = "cuda"
device = torch.device(device_type)




BATCH_SIZE = 32 if device_type == "mps" else 12
SEQ_LEN = 64 if device_type == "mps" else 512
ENCODER_LAYER_NUM = 6
DECODER_LAYER_NUM = 6
D_MODEL = 256 if device_type == "mps" else 512
HIDDEN_DIM = 512 if device_type == "mps" else 2048
NUM_HEADS = 8
DROPOUT = 0.1
tokenizer = AutoTokenizer.from_pretrained("gpt2",pad_token="<pad>",bos_token="<sos>",eos_token="<eos>",
                                                       add_bos_token=True, add_eos_token=True,max_length=SEQ_LEN, padding="max_length")
VOCAB_SIZE = tokenizer.vocab_size
EPOCHS = 3
STEPS = 1000000
BETA1 = 0.9
BETA2 = 0.98
EPSILON = 1e-9
LEARNING_RATE = 0.00001
WARMUP_STEPS = 4000

seed_value = 42
torch.manual_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)

In [11]:
print(VOCAB_SIZE)

50257
