In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F 

import os
import shutil

In [6]:
def make_labels(df, group_by_centers, data_name='eyepacs'):
    labels_lst = []
    for i in range(len(group_by_centers)):
        a = df[df['centers']==i]
        if data_name == 'eyepacs':
            a_array = np.asarray(a['level'])
        else:
            a_array = np.asarray(a['diagnosis'])
        cluster_label = Counter(a_array)
        labels = np.zeros([5])
        
        for j in range(5):
            labels[j] = cluster_label[j]
        labels_lst.append(labels)
        
    labels_npy = np.asarray(labels_lst)
    labels_tensor = torch.from_numpy(labels_npy)
    normalized_labels = F.softmax(labels_tensor)

    normalized_labels_npy = np.asarray(normalized_labels)
    return normalized_labels_npy

In [7]:
def make_centroid_folder(df, group_by_centers, data_path, label_path, data_name='eyepacs'):
    lst_for_id = []
    for i in range(len(group_by_centers)):
        lst_for_id.append(df[df['centers']==i]['id_code'].iloc[0])

    lst_for_centers = []
    for i in range(len(group_by_centers)):
        lst_for_centers.append(i)
    
    data = {'id_code': lst_for_id, 'centers':lst_for_centers}
    new_data = pd.DataFrame(data)
    new_data.to_csv(label_path,index=False)

    source = data_path+'4/'
    dest = data_path+'k5_images/'
    
    total_data_lst = os.listdir(data_path+'4')
    print(len(total_data_lst))
    
    for i in range(len(total_data_lst)):
        if data_name=='eyepacs':
            a = new_data[total_data_lst[i][:-5] == new_data['id_code']]['id_code']+'.jpeg'
        elif data_name=='aptos':
            a = new_data[total_data_lst[i][:-4] == new_data['id_code']]['id_code']+'.png'
        
        name = a.to_string(index=False)
        if name =='Series([], )':
            pass
        else:
            shutil.move(source+name, dest)

# EyePACS

In [4]:
eyepacs_csv = pd.read_csv('/home/minkyu/privacy/data/splited_val/eyepacs/eyepacs_val_10000.csv')
eyepacs_csv = eyepacs_csv.rename(columns={"image":"id_code"})
eyepacs_csv.to_csv('./labels/new_eyepacs_csv.csv',index=False)

In [5]:
len(eyepacs_csv)

10000

### W [16,512], K=5

### Make Labels

In [6]:
pwd

'/home/minkyu/privacy/ICML2022/classification2'

In [7]:
k_5_eyepacs = pd.read_csv('/home/minkyu/privacy/ICML2022/classification2/labels/train_aptos_infer_eyepacs_k5.csv')
new_eyepacs_csv = pd.read_csv('./labels/new_eyepacs_csv.csv')
df_k5_eyepacs_W16 = pd.merge(k_5_eyepacs, new_eyepacs_csv, on='id_code')
df_k5_eyepacs_W16.to_csv('./labels/df_k5_eyepacs_w16.csv')
# df = pd.read_csv('./labels/df_k5_eyepacs_w16.csv')

In [8]:
group_by_centers_k5_eyepacs_W16 = df_k5_eyepacs_W16.groupby(['centers'])

In [9]:
k5_eyepacs_W16_normalized_labels_npy = make_labels(df_k5_eyepacs_W16, group_by_centers_k5_eyepacs_W16)
labels_name = './labels/k5_eyepacs_W16_normalized_labels.npy'
np.save(labels_name, k5_eyepacs_W16_normalized_labels_npy)



In [32]:
# y = torch.from_numpy(k5_eyepacs_W16_normalized_labels_npy[0])
# y = torch.max(y,1)[1]

tensor([0.9738, 0.0066, 0.0066, 0.0066, 0.0066], dtype=torch.float64)

In [31]:
torch.max(torch.from_numpy(k5_eyepacs_W16_normalized_labels_npy[0]))

tensor(0.9738, dtype=torch.float64)

### Make Centroid data folder

In [14]:
data_path = '/home/minkyu/privacy/ICML2022/classification2/centroids/eyepacs/W16/'
label_path = './labels/centroid_eyepacs_k5_W16.csv'

In [15]:
make_centroid_folder(df=df_k5_eyepacs_W16, group_by_centers=group_by_centers_k5_eyepacs_W16, data_path=data_path, label_path=label_path, data_name='eyepacs')

10000


### W[1,512], K=5

### Make Labels

In [8]:
k_5_eyepacs_W1 = pd.read_csv('/home/minkyu/privacy/ICML2022/same_kmeans_W1/labels/train_aptos_infer_eyepacs_k5_1map2style.csv')
new_eyepacs_csv = pd.read_csv('./labels/new_eyepacs_csv.csv')
df_k5_eyepacs_W1 = pd.merge(k_5_eyepacs_W1, new_eyepacs_csv, on='id_code')
df_k5_eyepacs_W1.to_csv('./labels/df_k5_eyepacs_w1.csv')
# df = pd.read_csv('./labels/df_k5_eyepacs_w16.csv')

In [9]:
group_by_centers_k5_eyepacs_W1 = df_k5_eyepacs_W1.groupby(['centers'])

In [16]:
k5_eyepacs_W1_normalized_labels_npy = make_labels(df_k5_eyepacs_W1, group_by_centers_k5_eyepacs_W1)
labels_name = './labels/k5_eyepacs_W1_normalized_labels.npy' 
np.save(labels_name, k5_eyepacs_W1_normalized_labels_npy)

  from ipykernel import kernelapp as app


### Make Centroid data folder

In [21]:
data_path = '/home/minkyu/privacy/ICML2022/classification/centroids/eyepacs/W1/'
label_path = './labels/centroid_eyepacs_k5_W1.csv'

In [22]:
make_centroid_folder(df=df_k5_eyepacs_W1, group_by_centers=group_by_centers_k5_eyepacs_W1, data_path=data_path, label_path=label_path, data_name='eyepacs')

10000


# Aptos

### W[16,512], K=5

### Make Labels

In [14]:
k_5_aptos = pd.read_csv('/home/minkyu/privacy/ICML2022/same_kmeans_W16/labels/train_eyepacs_val_aptos_k5.csv')
new_aptos_csv = pd.read_csv('/home/minkyu/privacy/data/splited_val/aptos/aptos_val_3000.csv')
df_k5_aptos_W16 = pd.merge(k_5_aptos, new_aptos_csv, on='id_code')
df_k5_aptos_W16.to_csv('./labels/df_k5_aptos_w16.csv')
# df = pd.read_csv('./labels/df_k5_eyepacs_w16.csv')a
df_k5_aptos_W16.head()

Unnamed: 0,id_code,centers,count_centers,diagnosis
0,000c1434d8d7,295,5,2
1,001639a390f0,75,5,4
2,0024cdab0c1e,296,5,1
3,005b95c28852,513,5,0
4,0083ee8054ee,527,5,4


In [15]:
group_by_centers_k5_aptos_W16 = df_k5_aptos_W16.groupby(['centers'])

In [16]:
k5_aptos_W16_normalized_labels_npy = make_labels(df_k5_aptos_W16, group_by_centers_k5_aptos_W16,data_name='aptos')
labels_name = './labels/k5_aptos_W16_normalized_labels.npy'
np.save(labels_name, k5_aptos_W16_normalized_labels_npy)



### Make Centroids Data Folder

In [16]:
data_path = '/home/unix/mjeon/privacy/ICML2022/classification/centroids/aptos/W16/'
label_path = './labels/centroid_aptos_k5_W16.csv'

In [17]:
make_centroid_folder(df=df_k5_aptos_W16, group_by_centers=group_by_centers_k5_aptos_W16, data_path=data_path, label_path=label_path, data_name='aptos')

3000


### W[1,512], K=5

### Make Labels

In [17]:
k_5_aptos = pd.read_csv('/home/minkyu/privacy/ICML2022/same_kmeans_W1/labels/train_eyepacs_infer_aptos_k5_1map2style.csv')
new_aptos_csv = pd.read_csv('./labels/aptos_val_3000.csv')
df_k5_aptos_W1 = pd.merge(k_5_aptos, new_aptos_csv, on='id_code')
df_k5_aptos_W1.to_csv('./labels/df_k5_aptos_w1.csv')
# df = pd.read_csv('./labels/df_k5_eyepacs_w16.csv')
df_k5_aptos_W1.head()

Unnamed: 0,id_code,centers,count_centers,diagnosis
0,000c1434d8d7,285,5,2
1,001639a390f0,489,5,4
2,0024cdab0c1e,416,5,1
3,005b95c28852,44,5,0
4,0083ee8054ee,243,5,4


In [18]:
group_by_centers_k5_aptos_W1 = df_k5_aptos_W1.groupby(['centers'])

In [19]:
k5_aptos_W1_normalized_labels_npy = make_labels(df_k5_aptos_W1, group_by_centers_k5_aptos_W1,data_name='aptos')
labels_name = './labels/k5_aptos_W1_normalized_labels.npy'
np.save(labels_name, k5_aptos_W1_normalized_labels_npy)



### Make Centroid Data folder

In [11]:
data_path = '/home/minkyu/privacy/ICML2022/classification/centroids/aptos/W1/'
label_path = './labels/centroid_aptos_k5_W1.csv'

In [12]:
make_centroid_folder(df=df_k5_aptos_W1, group_by_centers=group_by_centers_k5_aptos_W1, data_path=data_path, label_path=label_path, data_name='aptos')

3000
