In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms

from numpy.random import default_rng
from pathlib import Path

## Downloading Data

In [3]:
# # Define a transform to normalize the data
# data_transforms = transforms.Compose([transforms.ToTensor(),
#                                        transforms.Normalize([0.5, 0.5, 0.5],[0.5, 0.5, 0.5])])

# testset = datasets.CIFAR10('~/.pytorch/CIFAR10_data/', train=False, download=True, transform=data_transforms)
# # Download and load the training data
# trainset = datasets.CIFAR10('~/.pytorch/CIFAR10_data/', download=True, train=True, transform=data_transforms)

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])
# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [4]:
trainset

Dataset MNIST
    Number of datapoints: 60000
    Root location: /home/fnx11/.pytorch/MNIST_data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [5]:
testset

NameError: name 'testset' is not defined

In [6]:
trainset[0]

(tensor([[[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
           -1.0000, -1.0000, -1.000

In [11]:
trainloader = torch.utils.data.DataLoader(trainset)
print(len(trainloader))

60000


## Getting indices of Different Classes
### will be used by the sampler to pick images from specific classes. 

In [9]:
from collections import defaultdict
class_ids = defaultdict(list)
for i in range(60000):
    class_ids[trainset[i][1]].append(i)

In [10]:
for i in range(10):
    print(len(class_ids[i]))

5923
6742
5958
6131
5842
5421
5918
6265
5851
5949


## Distributing Images to Clients.

In [13]:

client_img_tensors = [ [] for i in range(100)]
client_lbl_tensors = [ [] for i in range(100)]
minor_share = 28 # so we will put 26 images from each minority class => 26*9 = 234
major_share = 10*minor_share # 260 Images from major class

# Here we are assuming first 10 clients will have class 0 as majority and next 10 will have class 1 as majority
# and so on.
for i in range(10):
    major_loader = torch.utils.data.DataLoader(trainset, batch_size=major_share,
                                                   sampler=torch.utils.data.SubsetRandomSampler(class_ids[i][:2800]))
    major_iter = iter(major_loader)
    
    minor_loader = torch.utils.data.DataLoader(trainset, batch_size=minor_share,
                                                   sampler=torch.utils.data.SubsetRandomSampler(class_ids[i][2800:]))
    minor_iter = iter(minor_loader)
    
    for j in range(100):
        if j//10 == i:
#             put major_share
            data = next(major_iter)
            client_img_tensors[j].extend(data[0])
            client_lbl_tensors[j].extend(data[1])
        else:
#             put minor share
            data = next(minor_iter)
            client_img_tensors[j].extend(data[0])
            client_lbl_tensors[j].extend(data[1])

In [14]:
# Converting list of tensor images into 1 big tensor of all images and same for labels
client_data = []
for i in range(100):
    img_data = torch.stack(client_img_tensors[i])
    lbl_data = torch.stack(client_lbl_tensors[i])
    client_data.append((img_data, lbl_data)) # If we want to save as tuple or tensor we can decide

In [15]:
# TO check if we have desired label ratios
for i in range(100):
    print(f"client:{i}")
    print(client_data[i][1])

client:0
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1,

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,

In [14]:
# Save the tensor of images and labels for clients
username = 'fnx11'
save_folder = f'/home/{username}/thesis/codes/Playground/data/fed_data/clean_data/'

for i in range(100):
#     print(f'Saving data for client:{i}')
    img_tensor_file = save_folder + f'client_{i}_img.pt'
    lbl_tensor_file = save_folder + f'client_{i}_lbl.pt'
    torch.save(client_data[i][0], img_tensor_file)
    torch.save(client_data[i][1], lbl_tensor_file)



## Created Poisoned data for Randomly Selected 10, 20, 40 Clients

In [32]:
def get_poisoned_labels(orig_labels, label_flip_scheme):
    poisoned_labels = orig_labels.detach().clone()
    poisoned_labels[poisoned_labels==label_flip_scheme[0]] = label_flip_scheme[1]
    return poisoned_labels
    

poison_params = [0, 10, 20, 40] # no of clients poisoned out of 100
label_flip = (2,9)
total_clients = 100
seed = 42
rng = default_rng(seed)
base_path = f'/home/{username}/thesis/codes/Playground/'
root_save_folder = base_path + 'data/fed_data/'
Path(root_save_folder).mkdir(parents=True, exist_ok=True)
for poison_param in poison_params:
    save_folder = root_save_folder + f'poisoned_{poison_param}CLs/'
    Path(save_folder).mkdir(parents=True, exist_ok=True)
    clients_selected = rng.choice(total_clients, size=poison_param, replace=False)
    print(clients_selected)
    for i in range(total_clients):
        img_tensor_file = save_folder + f'client_{i}_img.pt'
        lbl_tensor_file = save_folder + f'client_{i}_lbl.pt'
        torch.save(client_data[i][0], img_tensor_file)
        # Performing label flipping
        poisoned_labels = client_data[i][1]
        if i in clients_selected:
            poisoned_labels = get_poisoned_labels(client_data[i][1], label_flip)
        torch.save(poisoned_labels, lbl_tensor_file)

[]
[96 71  8 60 41 94 68  9 19 82]
[78 81 49 36 27 73 30 52 56  6 35 40 85 84 41 67 91  8 15 21]
[36 42  9 27 26 78 47 18 25 98 62 86 40 52 37  5 97 75 95 23 58 79 14 72
 33 28 54 87 69 34 80 13 68 61 45  8 22 76 63 71]


### Let's check some of these client Data

In [30]:
# from 40 poisoned clients 61
data_folder = root_save_folder + 'poisoned_40CLs/'

lbl_tensor_file61 = data_folder + 'client_61_lbl.pt'
lbl_tensors61 = torch.load(lbl_tensor_file61)
print(lbl_tensors61)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,