In [6]:
import torch
import torchvision
# import torchvision.transforms as transforms
import numpy as np

class CIFARDataset:
    def __init__(self, iid=True, batch_size=128):
        self.train_x = None
        self.train_y = None
        self.test_x = None
        self.test_y = None
        self.batch_size = batch_size
        self.iid = iid

    def load_data(self):
        # Define the transformations
        # transform = transforms.Compose([
        #     transforms.ToTensor(),
        #     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        # ])

        # Load the CIFAR-10 dataset
        cifar_location = r'D:\MS_Thesis\Hierarchical_quantization\FedPAQ-MNIST-implemenation-main\cifar-10\cifar-10-python'
        train_data = torchvision.datasets.CIFAR10(root=cifar_location, train=True, download=False)
        test_data = torchvision.datasets.CIFAR10(root=cifar_location, train=False, download=False)

        # Extract data and labels from the dataset objects
        train_images = np.array([np.transpose(img.numpy(), (0, 1, 2)) for img, _ in train_data])
        train_labels = np.array([label for _, label in train_data])
        test_images = np.array([np.transpose(img.numpy(), (0, 1, 2)) for img, _ in test_data])
        test_labels = np.array([label for _, label in test_data])

        # If iid is False, sort data by label to simulate non-iid
        if not self.iid:
            sorted_indices = np.argsort(train_labels)
            train_images, train_labels = train_images[sorted_indices], train_labels[sorted_indices]

        # Normalize and reshape for PyTorch compatibility
        self.train_x = np.array([train_images[n:n + self.batch_size] for n in range(0, len(train_images) - self.batch_size, self.batch_size)], dtype=np.float32)
        self.train_y = np.array([train_labels[n:n + self.batch_size] for n in range(0, len(train_labels) - self.batch_size, self.batch_size)])
        self.test_x = np.array([test_images[n:n + self.batch_size] for n in range(0, len(test_images) - self.batch_size, self.batch_size)], dtype=np.float32)
        self.test_y = np.array([test_labels[n:n + self.batch_size] for n in range(0, len(test_labels) - self.batch_size, self.batch_size)])

        # Convert numpy arrays to torch tensors
        self.train_x, self.train_y = torch.tensor(self.train_x), torch.tensor(self.train_y)
        self.test_x, self.test_y = torch.tensor(self.test_x), torch.tensor(self.test_y)

        return self.train_x, self.train_y, self.test_x, self.test_y

# Usage example
dataset = CIFARDataset(iid=True, batch_size=128)
train_x, train_y, test_x, test_y = dataset.load_data()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)


AttributeError: numpy

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import numpy as np

cifar_location = r'D:\MS_Thesis\Hierarchical_quantization\FedPAQ-MNIST-implemenation-main\cifar-10\cifar-10-python'

class CIFAR10Dataset:
    def __init__(self, iid=True, batch_size=128):
        self.batch_size = batch_size
        self.iid = iid
        self.train_loader = None
        self.test_loader = None

    def load_data(self):
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        # Download and load the training data
        trainset = torchvision.datasets.CIFAR10(root=cifar_location, train=True, download=False, transform=transform)
        
        # Download and load the test data
        testset = torchvision.datasets.CIFAR10(root=cifar_location, train=False, download=False, transform=transform)

        # Calculate number of batches
        num_batches = len(trainset.data) // self.batch_size

        # Truncate the dataset to a multiple of the batch size
        train_x = trainset.data[:num_batches * self.batch_size]
        train_y = np.array(trainset.targets[:num_batches * self.batch_size])

        # Reshape into batches
        train_x_batches = train_x.reshape(num_batches, self.batch_size, *train_x.shape[1:])
        train_y_batches = train_y.reshape(num_batches, self.batch_size)
        
        return trainset.targets, trainset.data, testset.targets, testset.data

    def load_data_2(self):
        batch_size = self.batch_size
        trainset = torchvision.datasets.CIFAR10(root=cifar_location, train=True, download=False)
        testset = torchvision.datasets.CIFAR10(root=cifar_location, train=False, download=False)

        self.train_x, self.test_x, self.train_y, self.test_y = trainset.data, testset.data, trainset.targets, testset.targets

        self.train_x = np.array([self.train_x[n:n+batch_size] for n in range(0, len(self.train_x)-batch_size, batch_size)])/255.0
        self.test_x = np.array([self.test_x[n:n+batch_size] for n in range(0, len(self.test_x)-batch_size, batch_size)])/255.0
        self.train_y = np.array([self.train_y[n:n+batch_size] for n in range(0, len(self.train_y)-batch_size, batch_size)])
        self.test_y = np.array([self.test_y[n:n+batch_size] for n in range(0, len(self.test_y)-batch_size, batch_size)])

        self.train_x, self.train_y, self.test_x, self.test_y = torch.from_numpy(self.train_x), torch.from_numpy(self.train_y), torch.from_numpy(self.test_x), torch.from_numpy(self.test_y)

        return self.train_x, self.train_y, self.test_x, self.test_y

In [3]:
dataset = CIFAR10Dataset(iid=True, batch_size=128)
train_x_c, train_y_c, test_x_c, test_y_c = dataset.load_data_2()

In [9]:
train_x_c.shape

torch.Size([390, 128, 32, 32, 3])

In [5]:
reshaped_tensor = train_x_c.permute(0, 1, 4, 2, 3)


In [7]:
reshaped_tensor.shape

torch.Size([390, 128, 3, 32, 32])