In [1]:
import os
from typing import Callable, Optional, Tuple, TypeAlias
from torch import Tensor
from torch.utils.data import Dataset
import torch
from torchvision.io import read_image
from torchvision.transforms.functional import convert_image_dtype

ImageTransform: TypeAlias = Callable[[Tensor], Tensor]

class ImageDataset(Dataset[Tuple[Tensor, Tensor]]):
    def __init__(self,
                 image_root: str,
                 transform: Optional[ImageTransform] = None) -> None:
        self.labels = {}
        self.images = []
        for i, subdir in enumerate(os.listdir(image_root)):
            self.labels[i] = subdir
            subdir_path = os.path.join(image_root, subdir)
            for filename in os.listdir(subdir_path):
                path = os.path.join(subdir_path, filename)
                self.images.append((path, i))
        self.transform = transform

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, idx) -> Tuple[Tensor, Tensor]:
       path, label_idx = self.images[idx]
       img = read_image(path)
       img = convert_image_dtype(img, torch.float32)
       if self.transform:
           img = self.transform(img)
       label = torch.tensor(label_idx, dtype=torch.long)
       return img, label
        


In [2]:
dir = "/tmp/data/archive/seg_train/seg_train"
x = ImageDataset(dir)
print(f"Number of images: {len(x)}")
print(f"First image shape: {x[0][0].shape}")

Number of images: 14034
First image shape: torch.Size([3, 150, 150])


In [3]:
from torch.utils.data import DataLoader

loader = DataLoader(x, batch_size=64, shuffle=True)
for inputs, labels in loader:
    print(f"Input shape: {inputs.shape}")
    print(f"Label shape: {labels.shape}")

Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])


RuntimeError: stack expects each tensor to be equal size, but got [3, 150, 150] at entry 0 and [3, 113, 150] at entry 16

In [4]:
from torchvision.transforms.v2 import Pad

def pad_to_150(
    img: Tensor,
    fill: int | float = 0,
    padding_mode: str = "constant",
) -> Tensor:
    target = (150, 150)
    _, h, w = img.shape
    dh = max(0, target[0] - h)
    dw = max(0, target[1] - w)
    padding = (dw // 2, dh // 2, dw - dw // 2, dh - dh // 2)
    return Pad(padding, fill=fill, padding_mode=padding_mode)(img)

dir = "/tmp/data/archive/seg_train/seg_train"
x = ImageDataset(dir, transform=pad_to_150)


In [5]:
from torch.utils.data import DataLoader

loader = DataLoader(x, batch_size=64, shuffle=True)
for inputs, labels in loader:
    print(f"Input shape: {inputs.shape}")
    print(f"Label shape: {labels.shape}")

Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label shape: torch.Size([64])
Input shape: torch.Size([64, 3, 150, 150])
Label sh

In [6]:
import pandas as pd
from aconai.pipelines import RowAccessor

data = {
    "feature1": [1.0, 2.0, 3.0],
    "feature2": [4.0, 5.0, 6.0],
    "label": [0, 1, 0],
}
df = pd.DataFrame(data)
dataset = RowAccessor(df, labels="label")
features, labels = dataset[1]
print(f"Features: {features}")
print(f"Labels: {labels}")


Features: tensor([2., 5.])
Labels: tensor([1.])


In [21]:
from datetime import date
import numpy as np

data = {
    "date": [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)],
    "x1": [4.0, 5.0, 6.0],
    "x2": [3.0, 3.0, 4.0],
    "label": [0, 1, 0],
}
df = pd.DataFrame(data)

df["date"] = pd.to_datetime(df["date"])
one_hot = (pd.get_dummies(df["date"].dt.dayofweek, dtype=np.float32)
             .reindex(columns=range(7), fill_value=0.0))
one_hot.columns = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
transformed = pd.concat([one_hot, df[["x1", "label"]]], axis=1)
print(transformed)



   mon  tue  wed  thu  fri  sat  sun   x1  label
0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  4.0      0
1  0.0  0.0  0.0  1.0  0.0  0.0  0.0  5.0      1
2  0.0  0.0  0.0  0.0  1.0  0.0  0.0  6.0      0


In [None]:
dataset = RowAccessor(transformed, labels="label")
features, labels = dataset[1]
print(f"Features: {features}")
print(f"Labels: {labels}")


Features: tensor([0., 0., 0., 1., 0., 0., 0., 5.])
Labels: tensor([1.])
