In [1]:
# first, load the embeddings
# stored in torch tensors

import torch
import numpy as np
import os
import sys
import json
import argparse
import time
import random
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import scipy.stats 

embeddings_folder = '../data/cifar-10-embedding-3/embeddings/' 
embeddings_files = sorted(os.listdir(embeddings_folder))
test_embeddings = torch.load(embeddings_folder + embeddings_files[0]) # load the first file
print(test_embeddings.size()) # 128, 32, 32, 32

# calculate the entropy of the embeddings, select 1%, 5%, 10%, 20%, 50% of the embeddings
def calculate_entropy(embs, percentage):
    # embs are torch tensors
    # percentage shows the number of embeddings to select
    embs_entropy = []
    for i in range(embs.shape[len(embs.shape)-3]):
        embs_entropy.append(scipy.stats.entropy(embs[:, i].reshape(-1))) # b, c
    # get the top percentage of the channels
    embs_entropy = np.array(embs_entropy)
    indices = np.argsort(embs_entropy)
    num_selected = int(embs.shape[1] * percentage)
    selected_indices = indices[:num_selected]
    return selected_indices
    # out b, c*p, h, w or c*p, h, w

def ranker_entropy(embs, percentage):
    # calculate the entropy of the embeddings
    # if the data is on the cuda, move it to cpu
    if embs.is_cuda:
        embs = embs.detach().cpu()
    if type(embs) == torch.Tensor:
        embs = embs.detach().numpy()
    selected_indices = calculate_entropy(embs, percentage)
    # select indice from test embeddings
    # set the selected embeddings to 0
    selected_embeddings = embs[:, selected_indices]
    return selected_embeddings, selected_indices

# get the selected embeddings
selected_embeddings, selected_indices = ranker_entropy(test_embeddings, 0.1)
print(selected_embeddings.shape)
print(selected_indices.shape)


torch.Size([128, 32, 32, 32])
(128, 3, 32, 32)
(3,)


In [15]:
"""
# test the entropy function
from scipy.stats import entropy
a = torch.tensor([0.1, 0.2, 0.3, 0.4])
print(entropy(a))
a = torch.tensor([0.1, 0.2, 0.3, 0.5])
print(entropy(a))
a = torch.tensor([0.1, 0.4, 0.3, 0.5])
print(entropy(a))
"""

'\n# test the entropy function\nfrom scipy.stats import entropy\na = torch.tensor([0.1, 0.2, 0.3, 0.4])\nprint(entropy(a))\na = torch.tensor([0.1, 0.2, 0.3, 0.5])\nprint(entropy(a))\na = torch.tensor([0.1, 0.4, 0.3, 0.5])\nprint(entropy(a))\n'

In [13]:
# embeddings_folder = '../data/cifar-10-embedding-3/embeddings/' 
selected_embeddings.shape

torch.Size([128, 3, 32, 32])

In [14]:
embeddings_folder = '../data/cifar-10-embedding-3/embeddings/' 
embeddings_files = sorted(os.listdir(embeddings_folder))
test_embeddings = torch.load(embeddings_folder + embeddings_files[0]) # load the first file

In [11]:
"""
import matplotlib.pyplot as plt
# load a picture
figure_folder = '/home/tonypeng/Workspace1/adaptfilter/data/cifar-10-batches-py/test_batch'
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
cifar_data = unpickle(figure_folder)
figure0 = cifar_data[b'data'][1].reshape(3,32,32)
label0 = cifar_data[b'labels'][1]
# print figure0
plt.figure()
plt.imshow(figure0.transpose(1, 2, 0))
print(label0)

# plot figure0
import torchvision.transforms as transforms
import torchvision
transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
testset = torchvision.datasets.CIFAR10(
        root='/home/tonypeng/Workspace1/adaptfilter/data/', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
        testset, batch_size=100, shuffle=False, num_workers=4)
for i, data in enumerate(testloader):
    figure0, label0 = data
    break

# plot figure0
plt.figure()
plt.imshow(figure0[1].numpy().transpose(1, 2, 0))
print(label0[1])
# figure0 = figure0[1].unsqueeze(0)
# label0 = label0[1]
"""


"\nimport matplotlib.pyplot as plt\n# load a picture\nfigure_folder = '/home/tonypeng/Workspace1/adaptfilter/data/cifar-10-batches-py/test_batch'\ndef unpickle(file):\n    import pickle\n    with open(file, 'rb') as fo:\n        dict = pickle.load(fo, encoding='bytes')\n    return dict\ncifar_data = unpickle(figure_folder)\nfigure0 = cifar_data[b'data'][1].reshape(3,32,32)\nlabel0 = cifar_data[b'labels'][1]\n# print figure0\nplt.figure()\nplt.imshow(figure0.transpose(1, 2, 0))\nprint(label0)\n\n# plot figure0\nimport torchvision.transforms as transforms\nimport torchvision\ntransform_test = transforms.Compose([\n        transforms.ToTensor(),\n        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n    ])\ntestset = torchvision.datasets.CIFAR10(\n        root='/home/tonypeng/Workspace1/adaptfilter/data/', train=False, download=True, transform=transform_test)\ntestloader = torch.utils.data.DataLoader(\n        testset, batch_size=100, shuffle=False, num_worker

In [9]:
"""
# import model
from Models import mobilenetv2

client_model, server_model = mobilenetv2.stupid_model_splitter(weight_path='./Weights/cifar-10/MobileNetV2.pth')
client_model.eval()
server_model.eval()
features0 = client_model(figure0)
result = server_model(features0)
# get the index
result = torch.argmax(result, dim=1)
print(result)
print(label0)
print(result-label0)

# test split performance
result2 = []
for i in range(100):
    features0 = client_model(figure0[i].unsqueeze(0))
    result = server_model(features0)
    result = torch.argmax(result, dim=1)
    result2.append(result)
result2 = torch.tensor(result2)
print(result2)
print(result2-label0)
"""

"\n# import model\nfrom Models import mobilenetv2\n\nclient_model, server_model = mobilenetv2.stupid_model_splitter(weight_path='./Weights/cifar-10/MobileNetV2.pth')\nclient_model.eval()\nserver_model.eval()\nfeatures0 = client_model(figure0)\nresult = server_model(features0)\n# get the index\nresult = torch.argmax(result, dim=1)\nprint(result)\nprint(label0)\nprint(result-label0)\n\n# test split performance\nresult2 = []\nfor i in range(100):\n    features0 = client_model(figure0[i].unsqueeze(0))\n    result = server_model(features0)\n    result = torch.argmax(result, dim=1)\n    result2.append(result)\nresult2 = torch.tensor(result2)\nprint(result2)\nprint(result2-label0)\n"

In [2]:
# make some features to zero and test the performance
# load the client and server model
from Models import mobilenetv2
import torch
client_model, server_model = mobilenetv2.stupid_model_splitter(weight_path='./Weights/cifar-10/MobileNetV2.pth')
# set the client model to eval mode
client_model.eval()
server_model.eval()
client_model.cuda()
server_model.cuda()

# load the test data
import torchvision.transforms as transforms
import torchvision
from itertools import combinations
from tqdm import tqdm
transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
testset = torchvision.datasets.CIFAR10(
        root='/home/tonypeng/Workspace1/adaptfilter/data/', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
        testset, batch_size=1, shuffle=False, num_workers=4)
o_count = 0
r_count = 0
a_count = {}
# try the first 5 images
for i, data in enumerate(testloader):
        figure0, label0 = data
        figure0 = figure0.cuda()
        label0 = label0.cuda()
        # use the ranker
        t_emb = client_model(figure0) # gpu
        s_emb, s_ind = ranker_entropy(t_emb, 0.125) # cpu
        print('s_ind: ', s_ind)
        # create a new feature
        n_emb = torch.zeros(1,32,32,32) # cpu
        n_emb[:, s_ind] = torch.tensor(s_emb) # cpu
        # get the result
        n_emb = n_emb.cuda() # gpu
        result = server_model(n_emb)
        # print(n_emb)
        result = torch.argmax(result, dim=1)
        # print(result, label0, result-label0)
        if result == label0:
            o_count += 1
        
        # get some random features
        r_ind = np.random.choice(32, int(32*0.125))
        print('r_ind: ', r_ind) 
        n_emb = torch.zeros(1,32,32,32).cuda() # gpu
        n_emb[:, r_ind] = t_emb[:,r_ind] # gpu
        result = server_model(n_emb)
        # print(n_emb)
        result = torch.argmax(result, dim=1)
        # print('r: ' ,result, label0, result-label0)
        if result == label0:
            r_count += 1
        
        # check it we can improve the result with any conbination of the features
        total_ind = int(32*0.125)
        total_comb = list(combinations(range(32), total_ind))
        # make a large batch
        for j in tqdm(range(len(total_comb)//310)):
            n_emb = torch.zeros(310,32,32,32).cuda() # gpu
            for k in range(310):
                n_emb[k, total_comb[j*310+k]] = t_emb[0, total_comb[j*310+k]]
            n_emb = n_emb.cuda()
            result = server_model(n_emb)
            result = torch.argmax(result, dim=1)
            for k in range(310):
                count_key = str(total_comb[j*310+k])
                if result[k] == label0:
                    if count_key in a_count:
                        a_count[count_key] += 1
                    else:
                        a_count[count_key] = 0
        if i == 10:
            break
print('o: ', o_count)
print('r: ', r_count)
print('a: ', a_count)

# compare the performance of the original model and the ranker model
higher_count = 0
highest_count = 0
for a in a_count:
        if a_count[a] > o_count:
                higher_count += 1
        if a_count[a] > highest_count:
                highest_count = a_count[a]
print('higher_count: ', higher_count)
print('highest_count: ', highest_count)
# get the average performance of the ranker model
total = 0
for a in a_count:
        total += a_count[a]
print(total/len(a_count))

# we can see that p = 0.1
# o:  3
# r:  3
# higher_count:  803/4960
# highest_count:  10 (5, 13, 17)

# p = 0.125
# o:  4 [24  7  9 27]
# r:  3
# higher_count:  2752/35960
# highest_count:  10 (4, 17, 18, 26)
# comment: here, the combination is fixed, so if we set the best, it can use 0.1% of the features to get the best performance
# problem: how to search the most efficient features
    

Files already downloaded and verified
s_ind:  [24  7  9 27]
r_ind:  [16 24 20 19]


100%|██████████| 116/116 [00:05<00:00, 21.38it/s]


s_ind:  [ 7  9 31 27]
r_ind:  [22 26 11 13]


100%|██████████| 116/116 [00:05<00:00, 21.67it/s]


s_ind:  [ 7  9 31 27]
r_ind:  [11  4 10 27]


100%|██████████| 116/116 [00:05<00:00, 21.54it/s]


s_ind:  [ 9  7 27 31]
r_ind:  [ 3 22  4  1]


100%|██████████| 116/116 [00:05<00:00, 21.54it/s]


s_ind:  [24  9  7 27]
r_ind:  [22  5 25 27]


100%|██████████| 116/116 [00:05<00:00, 21.50it/s]


s_ind:  [24  9 28 27]
r_ind:  [22  6 20 11]


100%|██████████| 116/116 [00:05<00:00, 21.50it/s]


s_ind:  [24  9  4 28]
r_ind:  [ 7 25 30  2]


100%|██████████| 116/116 [00:05<00:00, 21.59it/s]


s_ind:  [ 9 24 27  7]
r_ind:  [10 23  3 29]


100%|██████████| 116/116 [00:05<00:00, 21.66it/s]


s_ind:  [ 9  7 27 31]
r_ind:  [ 8 25 22  0]


100%|██████████| 116/116 [00:05<00:00, 21.82it/s]


s_ind:  [ 7  9 31 24]
r_ind:  [ 4 22 24  4]


100%|██████████| 116/116 [00:05<00:00, 21.92it/s]


s_ind:  [ 7  9 31 27]
r_ind:  [28 28  4 13]


100%|██████████| 116/116 [00:05<00:00, 21.63it/s]

o:  4
r:  3
a:  {'(0, 1, 2, 4)': 2, '(0, 1, 2, 5)': 1, '(0, 1, 2, 10)': 1, '(0, 1, 2, 13)': 1, '(0, 1, 2, 14)': 2, '(0, 1, 2, 18)': 1, '(0, 1, 2, 23)': 2, '(0, 1, 2, 25)': 1, '(0, 1, 2, 26)': 2, '(0, 1, 2, 27)': 4, '(0, 1, 3, 4)': 2, '(0, 1, 3, 5)': 1, '(0, 1, 3, 10)': 1, '(0, 1, 3, 13)': 1, '(0, 1, 3, 14)': 2, '(0, 1, 3, 18)': 1, '(0, 1, 3, 23)': 2, '(0, 1, 3, 25)': 1, '(0, 1, 3, 26)': 2, '(0, 1, 3, 27)': 4, '(0, 1, 4, 6)': 2, '(0, 1, 4, 7)': 2, '(0, 1, 4, 8)': 2, '(0, 1, 4, 9)': 2, '(0, 1, 4, 10)': 2, '(0, 1, 4, 11)': 2, '(0, 1, 4, 12)': 2, '(0, 1, 4, 13)': 3, '(0, 1, 4, 14)': 2, '(0, 1, 4, 15)': 2, '(0, 1, 4, 16)': 2, '(0, 1, 4, 17)': 8, '(0, 1, 4, 18)': 5, '(0, 1, 4, 19)': 2, '(0, 1, 4, 20)': 2, '(0, 1, 4, 21)': 2, '(0, 1, 4, 22)': 2, '(0, 1, 4, 23)': 2, '(0, 1, 4, 24)': 2, '(0, 1, 4, 27)': 2, '(0, 1, 4, 28)': 2, '(0, 1, 4, 29)': 2, '(0, 1, 4, 30)': 2, '(0, 1, 4, 31)': 2, '(0, 1, 5, 6)': 1, '(0, 1, 5, 7)': 1, '(0, 1, 5, 8)': 1, '(0, 1, 5, 9)': 1, '(0, 1, 5, 11)': 1, '(0, 1, 5, 12)'




In [4]:
for a in a_count:
    if a_count[a] == 10:
        print(a, a_count[a])

(4, 17, 18, 26) 10


In [3]:
# Use Kmeans to select the features
import numpy as np
import torch
import numpy as np
import os
import sys
import json
import argparse
import time
import random
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import scipy.stats 

embeddings_folder = '../data/cifar-10-embedding-3/embeddings/' 
embeddings_files = sorted(os.listdir(embeddings_folder))
test_embeddings = torch.load(embeddings_folder + embeddings_files[0]) # load the first file
print(test_embeddings.size()) # 128, 32, 32, 32

# calculate the PCA of the embeddings, select 1%, 5%, 10%, 20%, 50% of the embeddings
def calculate_Kmeans(embs, percentage):
    
    

def ranker_Kmeans(embs, percentage):
    # calculate the Kmeans of the embeddings
    # if the data is on the cuda, move it to cpu
    if embs.is_cuda:
        embs = embs.detach().cpu()
    if type(embs) == torch.Tensor:
        embs = embs.detach().numpy()
    selected_indices = calculate_Kmeans(embs, percentage)
    # select indice from test embeddings
    # set the selected embeddings to 0
    selected_embeddings = embs[:, selected_indices]
    return selected_embeddings, selected_indices

# get the selected embeddings
selected_embeddings, selected_indices = ranker_PCA(test_embeddings, 0.1)
print(selected_embeddings.shape)
print(selected_indices)

IndentationError: expected an indented block after function definition on line 22 (2646349708.py, line 26)