In [20]:
import torch,torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np

In [23]:
def minstIID(dataset, num_users):
    num_images = int(len(dataset)/num_users)
    users_dict, indeces = {}, list(range(len(dataset)))
    for i in range(num_users):
        np.random.seed(i) # to expect the same random value each time of running  the code
        users_dict[i] = set(np.random.choice(indeces,num_images,replace=False))
        indeces = list(set(indeces) - users_dict[i]) # remove used indeces from main indeces
    return users_dict
                            

In [29]:
def minstNonIID(dataset, num_users):
    # define the number of classes and images per class
    classes, images = 100, 600
    
    # set classess indecies
    classes_indx = [i for i in range(classes)]
    
    # initlize user dict by user number
    users_dict = {i: np.array([]) for i in range(num_users)}
    
    #define indeces
    indeces = classes * images # 60K which is the number of total images
    
    # define unsorted labels
    unsorted_labels = dataset.train_labels.numpy()
    
    # define vertical stack 
    indeces_unlabels = np.vstack((indeces, unsorted_labels))
    
    # sort labels 
    indeces_sortedlabels = indeces_unlabels[:,indeces_unlabels[1,:].argsort()]
    
    indeces = indeces_sortedlabels[0,:] # indeces of sorted labels
    
    # Define each user its classes and images
    # define it randomly 2 classes and set it images
    for i in range(num_users):
        np.random.seed(i) # to expect the same random value each time of running  the code
        temp = set(np.random.choice(classes_indx,2,replace=False))
        classes_indx = list(set(classes_indx) - temp)
        for t in temp:
            users_dict[i] = np.concatenate((users_dict[i], indeces[t*images : (t+1)*images]),axis=0)
    
    return users_dict    

In [31]:
def minstNonIIDUnequal(dataset, num_users):
    # define the number of classes and images per class
    classes, images = 1200, 50 # shall be 60k
    
    # set classess indecies
    classes_indx = [i for i in range(classes)]
    
    # initlize user dict by user number
    users_dict = {i: np.array([]) for i in range(num_users)}
    
    #define indeces
    indeces = no.arange(classes * images) # 60K which is the number of total images
    
    # define unsorted labels
    unsorted_labels = dataset.train_labels.numpy()
    
    # define vertical stack 
    indeces_unlabels = np.vstack((indeces, unsorted_labels))
    
    # sort labels 
    indeces_sortedlabels = indeces_unlabels[:,indeces_unlabels[1,:].argsort()]
    
    indeces = indeces_sortedlabels[0,:] # indeces of sorted labels
    
    # Here the unbalance of the selecting the classes and number of classes,
    # but each client at least has one class
    
    min_cls_per_client = 1
    max_cls_per_client = 30
    
    # generate 10 (user number) random values
    random_selected_classes = np.random.tint(min_cls_per_client,max_cls_per_client+1,size=num_users)
    
    # calculate percentage of selected classes
    ratio_selected = sum(random_selected_classes) / classes # example 0.5 means onlt halfis used
    
    # so we are going to increase same selected number by the same ratio so that we reached 100%
    
    random_selected_classes = np.around(random_selected_classes/ratio_selected)
    
    # make it int
    random_selected_classes = random_selected_classes.astype(int)
    
    # Now there is two cases due to round up that we exceed the total number of classes allowed
    # or due to round , still some classes not selected
    
    # let us start by the first case
    
    if sum(random_selected_classes) > classes:
        
        # for each user select at least one class
        for i in range(num_users):
            temp = set(np.random.choice(classes_indx,1,replace=False))
            
            # subtract the selected claess from main list
            classes_indx = list(set(classes_indx) - temp)
            
            # For the seelected clasess concatenate the indeces of the images
            # where start by each selected till number of images (50)
            for t in temp:
                users_dict[i] = np.concatenate((users_dict[i], indeces[t*images:(t+1)*images]),axis=0)
    
        # subtract for all the user one allocated class
        random_selected_classes = random_selected_classes-1
        
        # now we will loop in each user give him his selected classes, till we 
        # reach the last one , will take only the remaining of the classes
        
        for i in range(num_users):
            if len(classes_indx) == 0:
                continue
            # here asign the selected user his random selected classes
            class_size = random_selected_classes[i]
            
            # check if we reach the end of the classes allocation
            if class_size > len(classes_indx):
                class_size = len(classes_indx)
            
            # set the seed for chosing randomly the classes
            np.random.seed(i)
            temp = set(np.random.chioce(classes_indx,class_size,replace=False))
            
            # subtract the temp from main classes
            classes_indx = list(set(classes_indx) - temp)
            
            # concatenate all the images indeces to the user
            for t in temp:
                users_dict[i] = np.concatenate((users_dict[i], indeces[t*images:(t+1)*images]),axis=0)
            # Finish case random selected > clasess
    
    else: # case random selected <= classes
            
        # give each user its random selected
        
        for i in range(num_users):
            
            class_size = random_selected_classes[i]
            np.random.seed(i)
            
            temp = set(np.random.choice(classes_indx,class_size,replace=False))
            
            classes_indx = list(set(classes_indx) - temp)
            
            for t in temp:
                users_dict[i] = np.concatenate((users_dict[i],indeces[t*images:(t+1)*images]),axis=0)
                
        # Now we assign each user by reandom selected classes
        # check if there are remaining classes not selected
        
        if len(classes_indx) > 0:
            class_size = len(classes_indx)
            
            # assign the remaining clasess to the user that have min classes "fair :D"
            
            j = min(users_dict, key=lambda x: len(users_dict.get(x)))
            
            temp = set(np.random.choice(classes_indx,class_size,replace=False))
            
            classes_indx = list(set(classes_indx) - temp)
            
            assert len(classes_indx) == 0
            
            for t in temp:
                users_dict[i] = np.concatenate((users_dict[j], indeces[t*images:(t+1)*images]),axis=0)
            # finish case random < classes
            
    return users_dict    

In [30]:
# Load Data
def load_dataset(num_users, iidtype):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,))])
    train_dataset = torchvision.datasets.MNIST(root = "./data", train = True, transform = transform, target_transform = None, download = True)
    test_dataset = torchvision.datasets.MNIST(root = "./data", train = False, transform = transform, target_transform = None, download = True)
    
    if iidtype == 'iid':
        train_group = minstIID(train_dataset, num_users)
        test_group = minstIID(test_dataset, num_users)
    elif iidtype == 'noniid':
        train_group = minstNonIID(train_dataset, num_users)
        test_group = minstNonIID(test_dataset, num_users)
    else:
        train_group = minstNonIIDUnequal(train_dataset, num_users)
        test_group = minstIIDUnequal(test_dataset, num_users)
    return train_dataset, test_dataset, train_group, test_group


In [10]:
#train_set

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )

In [11]:
#test_set

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )

In [28]:
class FedDataset(Dataset):
    def __init__(self,dataset,indx):
        self.dataset = dataset
        self.indx = [int(i) for i in indx]
    
    def __len__(self):
        return len(self.indx)
    
    def __getitem__(self,item):
        images,label = self.dataset[self.indx[item]]
        #return torch.tensor(images),torch.tensor(label)
        return torch.tensor(images).clone().detach(), torch.tensor(label).clone().detach()
        

In [25]:
def getActualImgs(dataset, indeces, batch_size):
    return DataLoader(FedDataset(dataset,indeces), batch_size=batch_size, shuffle=True)