In [1]:
import warnings

warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from tensorflow import keras
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import Input, Dense, Lambda, Reshape, Flatten, Dropout
from tensorflow.keras.layers import Reshape, Conv2D, Conv2DTranspose, LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from process_attr import *

## Implement Class

In [2]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = {'batch':[], 'epoch':[]}
        self.accuracy = {'batch':[], 'epoch':[]}
        self.val_loss = {'batch':[], 'epoch':[]}
        self.val_acc = {'batch':[], 'epoch':[]}
    
    def on_batch_end(self, batch, logs={}):
        self.losses['batch'].append(logs.get('loss'))
        self.accuracy['batch'].append(logs.get('acc'))
        self.val_loss['batch'].append(logs.get('val_loss'))
        self.val_acc['batch'].append(logs.get('val_acc'))
    
    def on_epoch_end(self, batch, logs={}):
        self.losses['epoch'].append(logs.get('loss'))
        self.accuracy['epoch'].append(logs.get('acc'))
        self.val_loss['epoch'].append(logs.get('val_loss'))
        self.val_acc['epoch'].append(logs.get('val_acc'))
        
    def loss_plot(self, loss_type):
        iters = range(len(self.losses[loss_type]))
        plt.figure()
        #acc
        plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
        
        #loss
        plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
        
        if loss_type == 'epoch':
            #val_acc
            plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
            
            #val_loss
            plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
        
        plt.grid(True)
        plt.xlabel(loss_type)
        plt.ylabel('acc-loss')
        plt.legend(loc="upper right")
        plt.show()

class Scaler(keras.layers.Layer):
    def __init__(self, tau=0.5, **kwargs):
        super(Scaler, self).__init__(**kwargs)
        self.tau = tau
    
    def build(self, input_shape):
        super(Scaler, self).build(input_shape)
        self.scale = self.add_weight(
            name='scale', shape=(input_shape[-1],), initializer='zeros'
        )
    
    def call(self, inputs, mode='positive'):
        if mode == 'positive':
            scale = self.tau + (1 - self.tau) * K.sigmoid(self.scale)
        else:
            scale = (1 - self.tau) * K.sigmoid(-self.scale)
        return inputs * K.sqrt(scale)
    
    def get_config(self):
        config = {'tau': self.tau}
        base_config = super(Scaler, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

class Sampling(keras.layers.Layer):
    def __init__(self, latent_dim=128, **kwargs):
        super(Sampling, self).__init__(**kwargs)
        self.latent_dim = latent_dim

    def build(self, input_shape):
        super(Sampling, self).build(input_shape)

    def call(self, inputs):
        z_mean, z_log_var = inputs
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self.latent_dim), seed=42)
        return z_mean + K.exp(z_log_var / 2) * epsilon

    def get_config(self):
        base_config = super(Sampling, self).get_config()
        config = {'latent_dim': self.latent_dim}
        return dict(list(base_config.items()) + list(config.items()))

## Parameters

In [3]:
########### change dataset here ##################
dataset = 'CUB' # 'AWA2', 'CUB', 'SUN'
##################################################

batch_size = 128
epochs = 100
attr_type = 'cms' # 'b', 'c', 'cmm', 'cms'

file_path = f'./data/{dataset}/'
mapping = {'b': b, 'c': c, 'cmm': cmm, 'cms': cms}
process_attr = mapping[attr_type](file_path)
process_attr, mat_file = np.array(process_attr)

seen_class_num = sum(1 for line in open(f'{file_path}/trainvalclasses.txt') if line.rstrip())
unseen_class_num = sum(1 for line in open(f'{file_path}/testclasses.txt') if line.rstrip())
class_num = seen_class_num + unseen_class_num
class_attr_dim = np.array(process_attr).shape[1]
class_attr_shape = (class_attr_dim, )

In [4]:
mat_path = f'./data/{dataset}/mat'
if not os.path.isdir(mat_path):
    os.mkdir(mat_path)
    os.mkdir(f'{mat_path}/{attr_type}')
    os.mkdir(f'{mat_path}/{attr_type}/seen')
    os.mkdir(f'{mat_path}/{attr_type}/unseen')

## Load data

In [5]:
features = sio.loadmat(f'{file_path}/res101.mat')
attr = sio.loadmat(f'{file_path}/att_splits.mat')
train_loc = attr['train_loc']
val_loc = attr['val_loc']
transpose_ft = features['features'].transpose()

data_train = np.array([transpose_ft[i-1].flatten() for i in train_loc])
label_train = np.array([int(features['labels'][i-1].flatten()) for i in train_loc])
attr_train = np.array([process_attr[i-1] for i in label_train])
print(data_train.shape)
print(attr_train.shape)
print(label_train.shape)
print(max(label_train))

print('-' * 25)

data_val = np.array([transpose_ft[i-1].flatten() for i in val_loc])
label_val = np.array([int(features['labels'][i-1].flatten()) for i in val_loc])
attr_val = np.array([process_attr[i-1] for i in label_val])
print(data_val.shape)
print(label_val.shape)
print(label_val.shape)
print(max(label_val))

(5875, 2048)
(5875, 312)
(5875,)
200
-------------------------
(2946, 2048)
(2946,)
(2946,)
186


## Model

### Encoder

In [6]:
x_inputs = Input(shape=(2048, ))
x = x_inputs
x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
# z_mean = Dense(class_attr_dim, activation='relu')(x)
# z_var = Dense(class_attr_dim, activation='relu')(x)
z_mean = Dense(class_attr_dim)(x)
z_var = Dense(class_attr_dim)(x)

2022-07-13 11:14:52.213954: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-13 11:14:52.218956: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-13 11:14:52.219254: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-13 11:14:52.220284: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [7]:
y_inputs = Input(shape=class_attr_shape)  # expert-defined

### Sampling

In [8]:
scaler = Scaler()
z_mean = scaler(z_mean, mode='positive')
z_var = scaler(z_var, mode='negative')
sampling = Sampling(class_attr_dim)
z = sampling([z_mean, z_var])

### Decoder 

In [9]:
ce_inputs = Input(shape=class_attr_shape)
x = ce_inputs
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
outputs= Dense(2048, activation='relu')(x)

### Modeling

In [10]:
encoder = Model(x_inputs, z)
decoder = Model(ce_inputs, outputs)
x_out = decoder(z)
vae = Model(inputs=[x_inputs, y_inputs], outputs=[x_out])

### Model Loss Function

In [11]:
# xent_loss是重構loss
xent_loss = 0.5 * K.sum(K.mean((x_inputs - x_out)**2))

# K.square(z_mean - y) 為latent v ector 向每個 class 的均值看齊 
kl_loss = - 0.5 * K.sum(1 + z_var - K.square(z_mean - y_inputs) - K.exp(z_var), axis=-1)

vae_loss = K.mean(xent_loss + kl_loss)

### Start train

In [12]:
vae.add_loss(vae_loss)

vae.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False))

# vae.compile(optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True))

# vae.summary()

history = LossHistory()
early_stopping = EarlyStopping(monitor='val_loss', patience=30, verbose=1)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5)


vae.fit(
    [data_train, attr_train],
    shuffle=True,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=([data_val, attr_val], None),
    callbacks=[history, early_stopping, learning_rate_reduction]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 22: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 25: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 28: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 31: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 34: ReduceLROnPlateau 

<keras.callbacks.History at 0x7f6c0c43b150>

In [13]:
encoder.save(f'./model/{dataset}/encoder_{attr_type}.h5')



## Calculate the averge attr

## Seen

In [14]:
# load encoder
encoder = load_model(
    f'./model/{dataset}/encoder_{attr_type}.h5', 
    custom_objects={'Scaler': Scaler, 'Sampling': Sampling}
)



In [26]:
train_set = set(label_train)
val_set = set(label_val)
trainval = list(train_set.union(val_set))

all_classes = [classname[0] for classname in attr['allclasses_names'].flatten()]
seen_classes = [line.rstrip() for line in open(f'./{file_path}/trainvalclasses.txt')]
unseen_classes = [line.rstrip() for line in open(f'./{file_path}/testclasses.txt')]

seen_label_map = {}
unseen_label_map = {}

for i in range(seen_class_num):
    seen_label_map[all_classes.index(seen_classes[i])] = i

for i in range(unseen_class_num):
    unseen_label_map[all_classes.index(unseen_classes[i])] = i
    
print(f'seen({len(seen_label_map)}):', seen_label_map)
print(f'unseen({len(unseen_label_map)}):', unseen_label_map)

seen(150): {0: 0, 1: 1, 8: 2, 9: 3, 13: 4, 15: 5, 30: 6, 31: 7, 46: 8, 47: 9, 48: 10, 51: 11, 52: 12, 53: 13, 54: 14, 63: 15, 64: 16, 70: 17, 108: 18, 109: 19, 150: 20, 155: 21, 160: 22, 161: 23, 179: 24, 84: 25, 77: 26, 60: 27, 153: 28, 34: 29, 126: 30, 25: 31, 36: 32, 76: 33, 198: 34, 130: 35, 21: 36, 145: 37, 167: 38, 74: 39, 82: 40, 187: 41, 96: 42, 29: 43, 189: 44, 163: 45, 118: 46, 89: 47, 98: 48, 88: 49, 19: 50, 139: 51, 199: 52, 197: 53, 116: 54, 44: 55, 27: 56, 3: 57, 195: 58, 102: 59, 174: 60, 117: 61, 58: 62, 91: 63, 5: 64, 105: 65, 157: 66, 193: 67, 39: 68, 176: 69, 114: 70, 4: 71, 101: 72, 180: 73, 171: 74, 73: 75, 142: 76, 2: 77, 172: 78, 133: 79, 120: 80, 144: 81, 146: 82, 32: 83, 23: 84, 72: 85, 162: 86, 22: 87, 106: 88, 59: 89, 40: 90, 104: 91, 148: 92, 66: 93, 147: 94, 169: 95, 69: 96, 168: 97, 110: 98, 11: 99, 111: 100, 81: 101, 41: 102, 17: 103, 16: 104, 56: 105, 10: 106, 131: 107, 38: 108, 14: 109, 143: 110, 37: 111, 24: 112, 112: 113, 62: 114, 7: 115, 83: 116, 185

In [16]:
data_seen = np.vstack([data_train, data_val])
label_seen = np.hstack([label_train, label_val])
attr_seen = np.vstack([attr_train, attr_val])

predict_attr = encoder.predict(data_seen)

# sum 40 class attributes
sum_attr = [[] for i in range(seen_class_num)]
real_attr = [[] for i in range(seen_class_num)]
count_class = [0 for i in range(seen_class_num)]

for idx in range(len(predict_attr)):
    original_label = label_seen[idx] - 1
    map_label = seen_label_map[original_label]
    if sum_attr[map_label] == []:
        sum_attr[map_label] = predict_attr[idx].copy()
        real_attr[map_label] = attr_seen[idx].copy()
    else:    
        sum_attr[map_label] += predict_attr[idx]
    count_class[map_label] += 1

# averge
for i in range(seen_class_num):
    sum_attr[i] = sum_attr[i] / count_class[i]
sum_attr = np.array(sum_attr)
print(sum_attr.shape)

(150, 312)


## Plot seen

In [17]:
for i in range(seen_class_num):
    diff = round(np.sum(np.abs(sum_attr[i] - real_attr[i])) / len(real_attr[i]), 6)

    if dataset == 'AWA2':
        attributes_name = pd.read_csv(f'./{file_path}/predicates.txt', header=None, sep='\t')
        plt.figure(figsize=(40, 10))
        plt.bar(attributes_name[1], height=sum_attr[i], align='edge', label='Learned CE', width=0.25)
        plt.bar(attributes_name[1], height=real_attr[i], align='edge', label='Real CE', width=-0.25) # uncomment if you want show realCE
        plt.legend(fontsize=15) # show label
        plt.xlabel('Attributes', fontsize=30)
        plt.xticks(fontsize=20, rotation='vertical')
        plt.yticks(fontsize=20)
        plt.title(f'Class : {seen_classes[i]}, diff : {diff}', fontsize=40)
        plt.savefig(f'./data/{dataset}/mat/{attr_type}/seen/{seen_classes[i]}.jpg')
        plt.show()
    else:
        print(f'Class : {seen_classes[i]}, diff : {diff}')

Class : 002.Laysan_Albatross, diff : 0.155502
Class : 003.Sooty_Albatross, diff : 0.170338
Class : 015.Lazuli_Bunting, diff : 0.15451
Class : 016.Painted_Bunting, diff : 0.130965
Class : 020.Yellow_breasted_Chat, diff : 0.202655
Class : 022.Chuck_will_Widow, diff : 0.171639
Class : 047.American_Goldfinch, diff : 0.136273
Class : 048.European_Goldfinch, diff : 0.458967
Class : 067.Anna_Hummingbird, diff : 0.194516
Class : 068.Ruby_throated_Hummingbird, diff : 0.188331
Class : 069.Rufous_Hummingbird, diff : 0.424068
Class : 073.Blue_Jay, diff : 0.33249
Class : 074.Florida_Jay, diff : 0.327598
Class : 075.Green_Jay, diff : 0.135007
Class : 076.Dark_eyed_Junco, diff : 0.321294
Class : 089.Hooded_Merganser, diff : 0.324411
Class : 090.Red_breasted_Merganser, diff : 0.13592
Class : 100.Brown_Pelican, diff : 0.512492
Class : 149.Brown_Thrasher, diff : 0.186056
Class : 150.Sage_Thrasher, diff : 0.316805
Class : 001.Black_footed_Albatross, diff : 0.149936
Class : 014.Indigo_Bunting, diff : 0.14

## save the seen attr

In [18]:
seen_attr = sum_attr.copy()

In [30]:
test_loc = attr['test_unseen_loc']

data_unseen = np.array([transpose_ft[i-1].flatten() for i in test_loc])
label_unseen = np.array([int(features['labels'][i-1].flatten()) for i in test_loc])
attr_unseen = np.array([process_attr[i-1] for i in label_unseen])

print(data_unseen.shape)
print(label_unseen.shape)
print(attr_unseen.shape)
print(max(label_unseen))

[239]
(2967, 2048)
(2967,)
(2967, 312)
195


## Test unseen

In [20]:
# calculate attr avg 
predict_attr = encoder.predict(data_unseen)

# sum 10 class attributes
sum_attr = [[] for i in range(unseen_class_num)]
real_attr = [[] for i in range(unseen_class_num)]
count_class = [0 for i in range(unseen_class_num)]

for idx in range(len(predict_attr)):
    original_label = label_unseen[idx] - 1
    map_label = unseen_label_map[original_label]
    if sum_attr[map_label] == []:
        sum_attr[map_label] = predict_attr[idx].copy()
        real_attr[map_label] = attr_unseen[idx].copy()
    else:    
        sum_attr[map_label] += predict_attr[idx]
    count_class[map_label] += 1

# averge
for i in range(unseen_class_num):
    sum_attr[i] = sum_attr[i] / count_class[i]
sum_attr = np.array(sum_attr)
print(sum_attr.shape)

(50, 312)


## Plot unseen

In [21]:
for i in range(unseen_class_num):
    diff = round(np.sum(np.abs(sum_attr[i] - real_attr[i])) / len(real_attr[i]), 6)

    if dataset == 'AWA2':
        attributes_name = pd.read_csv(f'./{file_path}/predicates.txt', header=None, sep='\t')
        plt.figure(figsize=(40, 10))
        plt.bar(attributes_name[1], height=sum_attr[i], align='edge', label='Learned CE', width=0.25)
        plt.bar(attributes_name[1], height=real_attr[i], align='edge', label='Real CE', width=-0.25)
        plt.legend(fontsize=15) # show label
        plt.xlabel('Attributes', fontsize=30)
        plt.xticks(fontsize=20, rotation='vertical')
        plt.yticks(fontsize=20)
        plt.title(f'Class : {unseen_classes[i]}, diff : {diff}', fontsize=40)
        plt.savefig(f'./data/{dataset}/mat/{attr_type}/unseen/{unseen_classes[i]}.jpg')
        plt.show()
    else:
        print(f'Class : {unseen_classes[i]}, diff : {diff}')

Class : 043.Yellow_bellied_Flycatcher, diff : 0.255684
Class : 111.Loggerhead_Shrike, diff : 0.206962
Class : 023.Brandt_Cormorant, diff : 0.228082
Class : 098.Scott_Oriole, diff : 0.282456
Class : 055.Evening_Grosbeak, diff : 0.401304
Class : 130.Tree_Sparrow, diff : 0.287108
Class : 139.Scarlet_Tanager, diff : 0.283686
Class : 123.Henslow_Sparrow, diff : 0.257288
Class : 156.White_eyed_Vireo, diff : 0.258643
Class : 124.Le_Conte_Sparrow, diff : 0.276093
Class : 200.Common_Yellowthroat, diff : 0.366267
Class : 072.Pomarine_Jaeger, diff : 0.256098
Class : 173.Orange_crowned_Warbler, diff : 0.324896
Class : 028.Brown_Creeper, diff : 0.417175
Class : 119.Field_Sparrow, diff : 0.270681
Class : 165.Chestnut_sided_Warbler, diff : 0.297096
Class : 103.Sayornis, diff : 0.2857
Class : 180.Wilson_Warbler, diff : 0.233478
Class : 077.Tropical_Kingbird, diff : 0.248146
Class : 012.Yellow_headed_Blackbird, diff : 0.347648
Class : 045.Northern_Fulmar, diff : 0.269432
Class : 190.Red_cockaded_Woodpe

## save the unseen attr

In [22]:
unseen_attr = sum_attr.copy()

In [23]:
all_calculated_ce = np.vstack([seen_attr, unseen_attr])
new_order_attr = [np.array([]) for i in range(class_num)]

for k,v in unseen_label_map.items():
    unseen_label_map[k] += seen_class_num

all_label_map = {**seen_label_map, **unseen_label_map} # merge dict
# print(all_label_map)

In [24]:
for k,v in all_label_map.items():
    new_order_attr[k] = all_calculated_ce[v]

In [25]:
attr['att'] = np.array(new_order_attr).transpose()
sio.savemat(f'{file_path}/{mat_file}', attr)