In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import transformers
import pandas as pd
from typing import Dict, List
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
import numpy as np

In [6]:
class TextDataset(Dataset):
    '''Dataset class for text data from tabular data for Classification'''
    def __init__(self, df: pd.DataFrame, tokenizer: str, pretrained_weights: str) -> None:
        '''Constructor for TextDatasetForClassification
        
        Args:
        ----
        - df: pd.DataFrame
            - The dataframe containing the required columns.
        - tokenizer: str
            - The name of the tokenizer you want to use
        - pretrained_weights: str
            - The pretrained weights you want to use
        '''
        super(TextDataset, self).__init__()
        self.df = df
        self.tokenizer = getattr(transformers, tokenizer)
        self.tokenizer = self.tokenizer.from_pretrained(pretrained_weights)
        # Write extra logic if needed
        
    def __len__(self) -> int:
        return len(self.df)
    
    def __getitem__(self, ix: int) -> Dict[str, torch.Tensor]:
        # Implement the __getitem__ logic here
        pass

In [11]:
class Image2ImageDataset(Dataset):
    '''Dataset class for image relate tasks'''
    def __init__(self, input_paths: List[str], output_paths: List[str], mode: str) -> None:
        super(Image2ImageDataset, self).__init__()
        self.input_paths = input_paths
        self.output_paths = output_paths
        
        if self.mode == "train":
            self.transforms = A.Compose([
                A.Resize(height=config.image_size[0], width=config.image_size[1], interpolation=0),
                A.Rotate(limit=90, p=0.5),
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.ColorJitter(p=0.5),
                A.Normalize(),
                ToTensorV2()
            ], p=1.0)
        elif self.mode == "valid":
            self.transforms = A.Compose([
                A.Resize(height=config.image_size[0], width=config.image_size[1], interpolation=0),
                A.Normalize(),
                ToTensorV2()
            ], p=1.0)
        else:
            raise NotImplementedError(f"The case where mode={self.mode} has not been implemented try from ['train', 'valid']")

    def __len__(self) -> int:
        return len(self.input_paths)
    
    def __getitem__(self, ix: int) -> Dict[str, torch.Tensor]:
        input_path = self.input_paths[ix]
        output_path = self.output_paths[ix]
        
        input_img = np.array(Image.open(input_path))
        output_img = np.array(Image.open(output_path))
        
        transformed = self.transforms(image=input_img, mask=output_img)
        
        image = torch.tensor(transformed['image'], dtype=torch.float32, device=config.device)
        mask = torch.tensor(transformed['mask'], dtype=torch.float32, device=config.device)
        mask = mask.permute(2, 0, 1)
        return {
            "image": image / 255.0,
            "mask": mask / 255.0
        }

In [12]:
class ModelName(nn.Module):
    def __init__(self) -> None:
        super(ModelName, self).__init__()
        # Write Model information here...
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Implement forward 
        return x

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import StratifiedKFold

In [14]:
class TabularDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray) -> None:
        super(TabularDataset, self).__init__()
        self.X = X
        self.y = y
    
    def __len__(self) -> int:
        return self.X.shape[0]
    
    def __getitem__(self, ix: int) -> Dict[str, torch.Tensor]:
        X = self.X[ix]
        y = self.y[ix]
        
        return {
            'X': torch.Tensor(X, dtype=torch.float32),
            'y': torch.Tensor(y, dtype=torch.float32)
        }

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import wandb


def train_one_step(
    model: nn.Module,
    optimizer: optim,
    scheduler: lr_scheduler, 
    loader: DataLoader, 
    epoch: int,
    criterion
) -> float:
    model.train()
    running_loss = 0.0
    dataset_size = 0.0
    total_size   = len(loader)
    
    pbar = tqdm(enumerate(loader), total=len(loader), desc=f'Epoch [{epoch}] (train) ')
    
    for step, batch in pbar:
        X, y = batch['X'], batch['y'] # change as per need
        bs = X.shape[0]
        
        yHat = model.forward(X)
        loss = criterion(yHat, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()
            
        running_loss += (loss.item() * bs)
        dataset_size += bs
        
        epoch_loss = running_loss / dataset_size
        current_lr = optimizer.param_groups[0]['lr']
        
        pbar.set_postfix(
            epoch_loss=f'{epoch_loss:.5f}',
            current_lr=f'{current_lr:.5f}'
        )
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
    return epoch_loss

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import wandb


@torch.no_grad()
def valid_one_step(
    model: nn.Module,
    optimizer: optim,
    scheduler: lr_scheduler, 
    loader: DataLoader, 
    epoch: int,
    criterion
) -> float:
    model.eval()
    running_loss = 0.0
    dataset_size = 0.0
    total_size   = len(loader)
    
    pbar = tqdm(enumerate(loader), total=len(loader), desc=f'Epoch [{epoch}] (valid) ')
    
    for step, batch in pbar:
        X, y = batch['X'], batch['y'] # change as per need
        bs = X.shape[0]
        
        yHat = model.forward(X)
        loss = criterion(yHat, y)
            
        running_loss += (loss.item() * bs)
        dataset_size += bs
        
        epoch_loss = running_loss / dataset_size
        
        pbar.set_postfix(
            epoch_loss=f'{epoch_loss:.5f}'
        )
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
    return epoch_loss

In [21]:
def run_training(fold: int):
    # Write Code...

SyntaxError: incomplete input (107942307.py, line 2)

In [None]:
import torch


class config:
    seed          = 42
    exp_name      = None
    model_name    = None
    base_model    = None
    train_bs      = 32
    valid_bs      = 2 * train_bs
    image_size    = [224, 224]
    in_channels   = 3
    latent_size   = 128
    hidden_size   = 512
    num_layers    = 2
    bidirectional = 0 # could be 0 or 1
    comment       = None
    epochs        = 10
    learning_rate = 3e-4
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(30000/train_bs*epochs)+50
    T_0           = 25
    warmup_epochs = 0
    wd            = 1e-6
    n_accumulate  = max(1, 32//train_bs)
    n_fold        = 5
    num_classes   = 3
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "mps" if torch.has_mps else "cpu")
    train_num     = 1