In [1]:
import math
import matplotlib.pyplot as plt
import tensorflow as tf
import os

from tensorflow import keras
from tensorflow.keras import layers

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

2022-11-06 07:22:31.687217: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-06 07:22:31.750064: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-11-06 07:22:31.767017: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-06 07:22:32.095628: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: li

In [2]:
# Dataset hyperparameters
unlabeled_dataset_size = 7294
labeled_dataset_size = 400
image_size = 96
image_channels = 3

# Algorithm hyperparameters
num_epochs = 10
batch_size = 256  # Corresponds to 200 steps per epoch
width = 128
temperature = 0.1
# Stronger augmentations for contrastive, weaker ones for supervised training
contrastive_augmentation = {"min_area": 0.25, "brightness": 0.6, "jitter": 0.2}
classification_augmentation = {"min_area": 0.75, "brightness": 0.3, "jitter": 0.1}

In [3]:
import random 
from sklearn.model_selection import train_test_split


unlabel_path = "./unlabeled"
unlabel_files = os.listdir(unlabel_path)
unlabel_files = [os.path.join(unlabel_path, i) for i in unlabel_files]

label_path = "./test"
label_fold_files = os.listdir(label_path)
label_fold_files = [os.path.join(label_path, i) for i in label_fold_files]
label_train_files = list()
label_test_files = list()
for fold in label_fold_files:
    fold_files = os.listdir(fold)
    train, test = train_test_split(fold_files, shuffle=True, test_size=0.2)
    label_train_files.extend([(os.path.join(fold, i), fold.split('/')[-1]) for i in train])
    label_test_files.extend([(os.path.join(fold, i), fold.split('/')[-1]) for i in test])

print(len(unlabel_files))
print(len(label_train_files))
print(len(label_test_files))

7294
400
100


In [4]:
print(label_train_files[0])
print(label_test_files[0])


('./test/3/0417.jpg', '3')
('./test/3/0150.jpg', '3')


In [5]:
import cv2
import numpy as np


def read_mri(mri_path):
    mri_data = cv2.imread(mri_path)
    return mri_data

def unlabel_data_generator(filename_list, batch_size=1):
    idx = 0
    while idx<len(filename_list):
        pixel_data_list = list()
        ground_true_list = list()
        for j in range(batch_size):
            if idx>=len(filename_list):
                break
            mri_filename = filename_list[idx]
            mri_filename = mri_filename.decode('ascii')
            pixel_data = read_mri(mri_filename)
            pixel_data_list.append(pixel_data)
            idx += 1
        yield (np.array(pixel_data_list), np.array([None]*len(pixel_data_list)))


def label_data_generator(filename_list, batch_size=1):
    idx = 0
    while idx<len(filename_list):
        mri_data_list = list()
        ground_true_list = list()
        for j in range(batch_size):
            if idx>=len(filename_list):
                break
            mri_filename, y_true = filename_list[idx]
            mri_filename = mri_filename.decode('ascii')
            pixel_data = read_mri(mri_filename)
            mri_data_list.append(pixel_data)
            ground_true_list.append(y_true)
            idx += 1
        yield (np.array(mri_data_list), np.array(ground_true_list))

In [25]:
print(unlabel_files[:5])

['./unlabeled/3362.jpg', './unlabeled/3370.jpg', './unlabeled/5102.jpg', './unlabeled/5279.jpg', './unlabeled/1144.jpg']


In [6]:
# generate Dataset
unlabel_dataset = tf.data.Dataset.from_generator(
    unlabel_data_generator,
    args=(unlabel_files, batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 96, 96, 3], [None])
)

label_train_dataset = tf.data.Dataset.from_generator(
    label_data_generator,
    args=(label_train_files, batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 96, 96, 3], [None])
)

label_test_dataset = tf.data.Dataset.from_generator(
    label_data_generator,
    args=(label_test_files, batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 96, 96, 3], [None])
)




2022-11-06 07:22:33.982727: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-06 07:22:33.983067: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-06 07:22:33.983230: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-06 07:22:33.983777: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-06 07:22:33.983957: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

In [7]:
# Distorts the color distibutions of images
class RandomColorAffine(layers.Layer):
    def __init__(self, brightness=0, jitter=0, **kwargs):
        super().__init__(**kwargs)

        self.brightness = brightness
        self.jitter = jitter

    def get_config(self):
        config = super().get_config()
        config.update({"brightness": self.brightness, "jitter": self.jitter})
        return config

    def call(self, images, training=True):
        if training:
            batch_size = tf.shape(images)[0]

            # Same for all colors
            brightness_scales = 1 + tf.random.uniform(
                (batch_size, 1, 1, 1), minval=-self.brightness, maxval=self.brightness
            )
            # Different for all colors
            jitter_matrices = tf.random.uniform(
                (batch_size, 1, 3, 3), minval=-self.jitter, maxval=self.jitter
            )

            color_transforms = (
                tf.eye(3, batch_shape=[batch_size, 1]) * brightness_scales
                + jitter_matrices
            )
            images = tf.clip_by_value(tf.matmul(images, color_transforms), 0, 1)
        return images

# Image augmentation module
def get_augmenter(min_area, brightness, jitter):
    zoom_factor = 1.0 - math.sqrt(min_area)
    return keras.Sequential(
        [
            keras.Input(shape=(image_size, image_size, image_channels)),
            layers.Rescaling(1 / 255),
            layers.RandomFlip("horizontal"),
            layers.RandomTranslation(zoom_factor / 2, zoom_factor / 2),
            layers.RandomZoom((-zoom_factor, 0.0), (-zoom_factor, 0.0)),
            RandomColorAffine(brightness, jitter),
        ]
    )
    
def get_encoder():
    return keras.Sequential(
        [
            keras.Input(shape=(image_size, image_size, image_channels)),
            layers.Conv2D(width, kernel_size=3, strides=2, activation="relu"),
            layers.Conv2D(width, kernel_size=3, strides=2, activation="relu"),
            layers.Conv2D(width, kernel_size=3, strides=2, activation="relu"),
            layers.Conv2D(width, kernel_size=3, strides=2, activation="relu"),
            layers.Flatten(),
            layers.Dense(width, activation="relu"),
        ],
        name="encoder",
    )

In [8]:
# Define the contrastive model with model-subclassing
class ContrastiveModel(keras.Model):
    def __init__(self):
        super().__init__()

        self.temperature = temperature
        self.contrastive_augmenter = get_augmenter(**contrastive_augmentation)
        self.classification_augmenter = get_augmenter(**classification_augmentation)
        self.encoder = get_encoder()
        # Non-linear MLP as projection head
        self.projection_head = keras.Sequential(
            [
                keras.Input(shape=(width,)),
                layers.Dense(width, activation="relu"),
                layers.Dense(width),
            ],
            name="projection_head",
        )
        # Single dense layer for linear probing
        self.linear_probe = keras.Sequential(
            [layers.Input(shape=(width,)), layers.Dense(4)], name="linear_probe"
        )

        self.encoder.summary()
        self.projection_head.summary()
        self.linear_probe.summary()

    def compile(self, contrastive_optimizer, probe_optimizer, **kwargs):
        super().compile(**kwargs)

        self.contrastive_optimizer = contrastive_optimizer
        self.probe_optimizer = probe_optimizer

        # self.contrastive_loss will be defined as a method
        self.probe_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

        self.contrastive_loss_tracker = keras.metrics.Mean(name="c_loss")
        self.contrastive_accuracy = keras.metrics.SparseCategoricalAccuracy(
            name="c_acc"
        )
        self.probe_loss_tracker = keras.metrics.Mean(name="p_loss")
        self.probe_accuracy = keras.metrics.SparseCategoricalAccuracy(name="p_acc")

    @property
    def metrics(self):
        return [
            self.contrastive_loss_tracker,
            self.contrastive_accuracy,
            self.probe_loss_tracker,
            self.probe_accuracy,
        ]

    def contrastive_loss(self, projections_1, projections_2):
        # InfoNCE loss (information noise-contrastive estimation)
        # NT-Xent loss (normalized temperature-scaled cross entropy)

        # Cosine similarity: the dot product of the l2-normalized feature vectors
        projections_1 = tf.math.l2_normalize(projections_1, axis=1)
        projections_2 = tf.math.l2_normalize(projections_2, axis=1)
        similarities = (
            tf.matmul(projections_1, projections_2, transpose_b=True) / self.temperature
        )

        # The similarity between the representations of two augmented views of the
        # same image should be higher than their similarity with other views
        batch_size = tf.shape(projections_1)[0]
        contrastive_labels = tf.range(batch_size)
        self.contrastive_accuracy.update_state(contrastive_labels, similarities)
        self.contrastive_accuracy.update_state(
            contrastive_labels, tf.transpose(similarities)
        )

        # The temperature-scaled similarities are used as logits for cross-entropy
        # a symmetrized version of the loss is used here
        loss_1_2 = keras.losses.sparse_categorical_crossentropy(
            contrastive_labels, similarities, from_logits=True
        )
        loss_2_1 = keras.losses.sparse_categorical_crossentropy(
            contrastive_labels, tf.transpose(similarities), from_logits=True
        )
        return (loss_1_2 + loss_2_1) / 2

    def train_step(self, data):
        print(data)
        (unlabeled_images, _) = data

        # Both labeled and unlabeled images are used, without labels
        images = tf.concat((unlabeled_images), axis=0)
        # Each image is augmented twice, differently
        augmented_images_1 = self.contrastive_augmenter(images, training=True)
        augmented_images_2 = self.contrastive_augmenter(images, training=True)
        with tf.GradientTape() as tape:
            features_1 = self.encoder(augmented_images_1, training=True)
            features_2 = self.encoder(augmented_images_2, training=True)
            # The representations are passed through a projection mlp
            projections_1 = self.projection_head(features_1, training=True)
            projections_2 = self.projection_head(features_2, training=True)
            contrastive_loss = self.contrastive_loss(projections_1, projections_2)
        gradients = tape.gradient(
            contrastive_loss,
            self.encoder.trainable_weights + self.projection_head.trainable_weights,
        )
        self.contrastive_optimizer.apply_gradients(
            zip(
                gradients,
                self.encoder.trainable_weights + self.projection_head.trainable_weights,
            )
        )
        self.contrastive_loss_tracker.update_state(contrastive_loss)

        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        labeled_images, labels = data

        # For testing the components are used with a training=False flag
        preprocessed_images = self.classification_augmenter(
            labeled_images, training=False
        )
        features = self.encoder(preprocessed_images, training=False)
        class_logits = self.linear_probe(features, training=False)
        probe_loss = self.probe_loss(labels, class_logits)
        self.probe_loss_tracker.update_state(probe_loss)
        self.probe_accuracy.update_state(labels, class_logits)

        # Only the probe metrics are logged at test time
        return {m.name: m.result() for m in self.metrics[2:]}


# Contrastive pretraining
pretraining_model = ContrastiveModel()
pretraining_model.compile(
    contrastive_optimizer=keras.optimizers.Adam(),
    probe_optimizer=keras.optimizers.Adam(),
)

pretraining_history = pretraining_model.fit(
    unlabel_dataset, epochs=num_epochs, validation_data=label_train_dataset
)
print(
    "Maximal validation accuracy: {:.2f}%".format(
        max(pretraining_history.history["val_p_acc"]) * 100
    )
)

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 47, 47, 128)       3584      
                                                                 
 conv2d_1 (Conv2D)           (None, 23, 23, 128)       147584    
                                                                 
 conv2d_2 (Conv2D)           (None, 11, 11, 128)       147584    
                                                                 
 conv2d_3 (Conv2D)           (None, 5, 5, 128)         147584    
                                                                 
 flatten (Flatten)           (None, 3200)              0         
                                                                 
 dense (Dense)               (None, 128)               409728    
                                                                 
Total params: 856,064
Trainable params: 856,064
Non-trainab

2022-11-06 07:22:42.802084: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8500
2022-11-06 07:22:43.535929: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Maximal validation accuracy: 27.75%


In [13]:
# Supervised finetuning of the pretrained encoder
finetuning_model = keras.Sequential(
    [
        layers.Input(shape=(image_size, image_size, image_channels)),
        get_augmenter(**classification_augmentation),
        pretraining_model.encoder,
        layers.Dense(4),
    ],
    name="finetuning_model",
)
finetuning_model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")],
)

finetuning_history = finetuning_model.fit(
    label_train_dataset, epochs=num_epochs, validation_data=label_test_dataset
)
print(
    "Maximal validation accuracy: {:.2f}%".format(
        max(finetuning_history.history["val_acc"]) * 100
    )
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Maximal validation accuracy: 71.00%


In [14]:
label_files = label_test_files.copy()
label_files.extend(label_train_files)
y_true_list = [int(y_true) for (data, y_true) in label_files]
print(y_true_list)

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [15]:

total_dataset = tf.data.Dataset.from_generator(
    label_data_generator,
    args=(label_files, batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 96, 96, 3], [None])
)
emb = pretraining_model.encoder.predict(total_dataset)
print(emb.shape)

(500, 128)


In [32]:
padding_emb = emb.copy()
print(padding_emb.shape)
padding_result_emb = list()
for i in padding_emb:
    padding_result_emb.append(np.append(i, [0]*(512-128)))
padding_result_emb = np.array(padding_result_emb)
print(padding_result_emb.shape)

(500, 128)
(500, 512)


In [34]:
print(padding_result_emb[0])

[  0.           0.          78.15087128   0.           0.
 315.85351562  29.1821785   10.82477283   0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.         155.58505249   0.           0.
 162.99966431   8.10432053   0.           0.          81.29200745
   0.          54.78651047   0.           0.           0.
   0.           0.          14.6925869   11.72392368   0.
   0.           0.           0.           0.           0.
  50.51418686 219.35920715   0.           0.           0.
   0.           0.         128.64035034   0.           0.
   0.           0.           0.           0.          99.15350342
   0.           0.         206.35559082   0.           0.
   0.           0.         115.81131744   0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
  69.96138763   0.          64.55688477  55.65756226   0

In [35]:
import torch
import torch.nn.functional as F


def KNN(emb, cls, batch_size, Ks=[1, 10, 50, 100]):
    """Apply KNN for different K and return the maximum acc"""
    preds = []
    mask = torch.eye(batch_size).bool().to(emb.device)
    mask = F.pad(mask, (0, len(emb) - batch_size))
    for batch_x in torch.split(emb, batch_size):
        dist = torch.norm(
            batch_x.unsqueeze(1) - emb.unsqueeze(0), dim=2, p="fro")
        now_batch_size = len(batch_x)
        mask = mask[:now_batch_size]
        dist = torch.masked_fill(dist, mask, float('inf'))
        # update mask
        mask = F.pad(mask[:, :-now_batch_size], (now_batch_size, 0))
        pred = []
        for K in Ks:
            knn = dist.topk(K, dim=1, largest=False).indices
            knn = cls[knn].cpu()
            pred.append(torch.mode(knn).values)
        pred = torch.stack(pred, dim=0)
        preds.append(pred)
    preds = torch.cat(preds, dim=1)
    accs = [(pred == cls.cpu()).float().mean().item() for pred in preds]
    return max(accs)

print(padding_result_emb.shape)
print(y_true_list)
KNN(torch.tensor(padding_result_emb), torch.tensor(np.array(y_true_list)), 128)

(500, 512)
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

0.9559999704360962

In [21]:
print(unlabel_files[:5])

sort_unlabel_files = unlabel_files.copy()
sort_unlabel_files = sorted(sort_unlabel_files)

print(sort_unlabel_files[:5])

['./unlabeled/3362.jpg', './unlabeled/3370.jpg', './unlabeled/5102.jpg', './unlabeled/5279.jpg', './unlabeled/1144.jpg']
['./unlabeled/0000.jpg', './unlabeled/0001.jpg', './unlabeled/0002.jpg', './unlabeled/0003.jpg', './unlabeled/0004.jpg']


In [26]:
print(sort_unlabel_files[:5])
# generate Dataset
unlabel_test_dataset = tf.data.Dataset.from_generator(
    unlabel_data_generator,
    args=(sort_unlabel_files, batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 96, 96, 3], [None])
)
unlabel_emb = pretraining_model.encoder.predict(unlabel_test_dataset)
print(unlabel_emb.shape)

['./unlabeled/0000.jpg', './unlabeled/0001.jpg', './unlabeled/0002.jpg', './unlabeled/0003.jpg', './unlabeled/0004.jpg']
(7294, 128)


In [36]:
padding_unlabel_emb = unlabel_emb.copy()
print(padding_unlabel_emb.shape)
padding_result_unlabel_emb = list()
for i in unlabel_emb:
    padding_result_unlabel_emb.append(np.append(i, [0]*(512-128)))
padding_result_unlabel_emb = np.array(padding_result_unlabel_emb)
print(padding_result_unlabel_emb.shape)

(7294, 128)
(7294, 512)


In [37]:
print(unlabel_emb[0])
print(padding_result_unlabel_emb[0])

[  0.          0.          0.          0.          0.        224.3529
  38.81175     0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.        143.23363     0.
   0.        147.00427     0.          0.          0.        116.69171
  13.271267   78.16485     0.          0.          0.         72.33827
   0.          0.         96.33503     0.          0.          0.
   0.          0.          0.         54.97493   181.93388    45.70551
   0.          0.          0.          0.        138.92345     0.
   0.          0.          0.        105.34854     0.         80.96767
   0.          0.        100.73903     0.          0.          0.
   0.         83.009094    0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.        138.58371     0.          2.5910926  21.499939
   0.        204.87994     0.        102.82436

In [None]:
np.save("0711282", padding_result_unlabel_emb)