In [1]:
import sys
import os

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

In [3]:
sys.path.append(os.path.abspath(".."))
from utils import read_features, read_targets, print_info_features, print_info_targets

## Read Features and Targets

In [4]:
path = os.path.abspath(os.path.join(os.getcwd(), "../../data/chronology_prediction"))

In [15]:
targets = ["StartYear", "YearRange"]

In [5]:
X = read_features(path, f_type="tensors")
y = read_targets(path, targets, f_type="tensors")

Loaded X_train_tfidf
Loaded X_train_bert
Loaded X_train_cannyhog
Loaded X_train_resnet
Loaded X_train_vit
Loaded X_test_tfidf
Loaded X_test_bert
Loaded X_test_cannyhog
Loaded X_test_resnet
Loaded X_test_vit
Loaded y_train
Loaded y_test


In [6]:
print_info_features(X)

{
	train: {
		tfidf: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719, 300]), 
		bert: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719, 768]), 
		cannyhog: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719, 2917]), 
		resnet: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719, 2048]), 
		vit: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719, 768]), 
	},
	test: {
		tfidf: 
			<class 'torch.Tensor'>
			shape = torch.Size([191, 300]), 
		bert: 
			<class 'torch.Tensor'>
			shape = torch.Size([191, 768]), 
		cannyhog: 
			<class 'torch.Tensor'>
			shape = torch.Size([191, 2917]), 
		resnet: 
			<class 'torch.Tensor'>
			shape = torch.Size([191, 2048]), 
		vit: 
			<class 'torch.Tensor'>
			shape = torch.Size([191, 768]), 
	},
}


In [7]:
print_info_targets(y)

{
	train: {
		StartYear: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719])
		YearRange: 
			<class 'torch.Tensor'>
			shape = torch.Size([1719])
	},
	test: {
		StartYear: 
			<class 'torch.Tensor'>
			shape = torch.Size([191])
		YearRange: 
			<class 'torch.Tensor'>
			shape = torch.Size([191])
	},
}


In [8]:
class PotteryDataset(Dataset):
    def __init__(self, X, y):
        # Save inputs (features) and outputs (targets)
        # If X is not already a tensor, convert it
        self.X = torch.tensor(X, dtype=torch.float32) if not torch.is_tensor(X) else X
        self.y = y

    def __len__(self):
        # Return number of samples in dataset
        return len(self.X)

    def __getitem__(self, idx):
        # Return one sample (features and target) at position idx
        return self.X[idx], self.y[idx]

In [16]:
datasets = {
    target: {
        subset: {
            feature_set:
                PotteryDataset(X_tensor, y[subset][target])
            for feature_set, X_tensor in X[subset].items()
        }
        for subset in X.keys()
    }
    for target in targets
}

In [19]:
loaders = {
    target: {
        subset: {
            feature_set:
                DataLoader(datasets[target][subset][feature_set], batch_size=64, shuffle=(subset == "train"))
            for feature_set, X_tensor in X[subset].items()
        }
        for subset in X.keys()
    }
    for target in targets
}