In [2]:
# Configuration
import torch 
from datetime import datetime
import logging 
from pathlib import Path 
import os

In [3]:
# Configure Directory 
project_dir = Path(os.getcwd()).parent
data_dir = project_dir / "data"
model_dir = project_dir / "models"
log_dir = project_dir / "logs"

data_dir.mkdir(parents=True, exist_ok=True)
model_dir.mkdir(parents=True, exist_ok=True)
log_dir.mkdir(parents=True, exist_ok=True)
print(f'project_dir: {project_dir}')
print(f'data_dir: {data_dir}')
print(f'model_dir: {model_dir}')
print(f'log_dir: {log_dir}')

project_dir: /Users/ball/Documents/workspace/latex-generator
data_dir: /Users/ball/Documents/workspace/latex-generator/data
model_dir: /Users/ball/Documents/workspace/latex-generator/models
log_dir: /Users/ball/Documents/workspace/latex-generator/logs


In [4]:
# Configure logger 
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
log_file = log_dir / f'log_{timestamp}.log'

logger = logging.getLogger('Handwriting2LaTeX')
logger.setLevel(logging.INFO)

file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)

logger.addHandler(file_handler)
logger.addHandler(logging.StreamHandler())

In [5]:
# log the directory information 
logger.info(f'PROJECT_DIR: {project_dir}')
logger.info(f'MODEL_DIR: {model_dir}')
logger.info(f'LOG_DIR: {log_dir}')

PROJECT_DIR: /Users/ball/Documents/workspace/latex-generator
MODEL_DIR: /Users/ball/Documents/workspace/latex-generator/models
LOG_DIR: /Users/ball/Documents/workspace/latex-generator/logs


In [6]:
# Define Parameter for InkML parsing 
TIME_SAMPLING_DELTA = 30
SEQ_MAX = 500
SEQ_MIN = -500
PADDING = 4

logger.info(f'TIME_SAMPLING_DELTA: {TIME_SAMPLING_DELTA}')
logger.info(f'SEQ_MAX: {SEQ_MAX}')
logger.info(f'SEQ_MIN: {SEQ_MIN}')
logger.info(f'PADDING: {PADDING}')

# dataset, dataloader parameter
NUM_WORKERS=2
logger.info(f'NUM_WORKERS: {NUM_WORKERS}')

# Load the model?
LOAD_MODEL=False
LOADING_VIT_MODEL_NAME=""
LOADING_UL2_MODEL_NAME=""

# Define Training parameters for ViT
IMG_SIZE = 224
PATCH_SIZE=16
IMG_IN_CHANNELS=3
D_MODEL=512 # also used in mT5
SIGLIP_N_LAYERS=6
SIGLIP_N_HEADS=8
SIGLIP_FFN_HIDDEN=1024
SIGLIP_DROPOUT=0.1


# Define Training parameters 
EPOCHS = 100
BATCH_SIZE = 16

GEMMA_N_LAYERS=3
GEMMA_N_HEADS=8
GEMMA_FFN_HIDDEN=1024
GEMMA_DROPOUT=0.1

GEMMA_MAX_SEQ_LEN=1024

logger.info(f'EPOCHS: {EPOCHS}')
logger.info(f'BATCH_SIZE: {BATCH_SIZE}')
logger.info(f'IMG_SIZE: {IMG_SIZE}')

TIME_SAMPLING_DELTA: 30
SEQ_MAX: 500
SEQ_MIN: -500
PADDING: 4
NUM_WORKERS: 2
EPOCHS: 100
BATCH_SIZE: 16
IMG_SIZE: 224


In [7]:
# Configure device: CUDA, MPS, CPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA as device")
else:
    # Check that MPS is available
    if not torch.backends.mps.is_available():
        if not torch.backends.mps.is_built():
            print("MPS not available because the current PyTorch install was not "
                  "built with MPS enabled.")
        else:
            print("MPS not available because the current MacOS version is not 12.3+ "
                  "and/or you do not have an MPS-enabled device on this machine.")
        device = torch.device("cpu")
        print("Using CPU as device")
    else:
        device = torch.device("mps")
        print("Using MPS as device")

# for mps, we just use cpu
device = torch.device("cpu")

    
torch.set_default_device(device)
logger.info(f'device: {device}')

device: cpu


Using MPS as device
