# VIME-Self Notebook
Based on: https://github.com/jsyoon0823/VIME.git

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# If you need to adjust the path below, please adjust the path in the 'Data_Preperation_...' file accordingly
%cd /content/drive/MyDrive/Energy_Prediction/models/self_supervised/vime

/content/drive/MyDrive/self_supervised/vime


In [None]:
!pip install -r requirements



In [None]:
import numpy as np
import random
import warnings
warnings.filterwarnings("ignore")

from supervised_models import logit, xgb_model
from Data_Preperation_VIME import X_pretrain, X_train, y_train, X_test, y_test
from vime_utils import perf_metric
from sklearn.metrics import confusion_matrix

random.seed(42)
np.random.seed(42)

### Set the parameters and define output

-   label_no: Number of labeled data to be used
-   model_sets: supervised model set (mlp, logit, or xgboost)
-   p_m: corruption probability for self-supervised learning
-   alpha: hyper-parameter to control the weights of feature and mask losses
-   K: number of augmented samples
-   beta: hyperparameter to control supervised and unsupervised loss
-   metric: prediction performance metric (either acc or auc)

In [None]:
total_labeled_samples = y_train.shape[0]
label_no = total_labeled_samples
model_sets = ['logit','xgboost','mlp']
p_m = 0.3
alpha = 2.0
K = 3
beta = 1.0
metric = 'acc'
results = np.zeros([len(model_sets)+2])

# Double Checking

In [None]:
print('x_label shape:', X_train.shape)
print('y_label shape:', y_train.shape)
print('x_pretrain shape:', X_pretrain.shape)
print('x_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

print('Type of x_label:', type(X_train))
print('Type of y_label:', type(y_train))
print('Type of pretrain:', type(X_pretrain))
print('Type of x_test:', type(X_test))
print('Type of y_test:', type(y_test))

x_label shape: (1620, 51)
y_label shape: (1620, 9)
x_pretrain shape: (5110, 51)
x_test shape: (175, 51)
y_test shape: (175, 9)
Type of x_label: <class 'numpy.ndarray'>
Type of y_label: <class 'numpy.ndarray'>
Type of pretrain: <class 'numpy.ndarray'>
Type of x_test: <class 'numpy.ndarray'>
Type of y_test: <class 'numpy.ndarray'>


### Train supervised models

- Train 3 supervised learning models (Logistic regression, XGBoost, MLP)
- Save the performances of each supervised model.

In [None]:
# Logistic regression
y_test_hat = logit(X_train, y_train, X_test)
results[0] = perf_metric(metric, y_test, y_test_hat)
print('Logistic Regression Performance: ' + str(results[0]))

Logistic Regression Performance: 0.6857142857142857


In [None]:
# XGBoost
y_test_hat = xgb_model(X_train, y_train, X_test)
results[1] = perf_metric(metric, y_test, y_test_hat)
print('XGBoost Performance: ' + str(results[1]))

XGBoost Performance: 0.9542857142857143


### Train & Test VIME-Self
Train self-supervised part of VIME framework only
- Check the performance of self-supervised part of VIME framework.

In [None]:
# MLP
mlp_parameters = dict()
mlp_parameters['hidden_dim'] = 200
mlp_parameters['epochs'] = 100
mlp_parameters['activation'] = 'relu'
mlp_parameters['batch_size'] = 100

# VIME
vime_self_parameters = dict()
vime_self_parameters['batch_size'] = 128
vime_self_parameters['epochs'] = 50

In [None]:
mlp_accuracies = []
class_accuracies_mlp = {i: [] for i in range(2, 11)}
vime_accuracies = []
class_accuracies_vime = {i: [] for i in range(2, 11)}

for i in range(20):
    from supervised_models import mlp
    # Train MLP
    y_test_hat_mlp = mlp(X_train, y_train, X_test, mlp_parameters)
    mlp_accuracies.append(perf_metric(metric, y_test, y_test_hat_mlp))

    # Calculate accuracy for each class
    y_test_mlp = np.argmax(y_test, axis=1) + 2
    y_test_hat_mlp = np.argmax(y_test_hat_mlp, axis=1) + 2
    cm_mlp = confusion_matrix(y_test_mlp, y_test_hat_mlp, labels=range(2, 11))
    for j in range(2, 11):
        idx = j - 2
        class_accuracy = cm_mlp[idx, idx] / np.sum(cm_mlp[idx, :])
        class_accuracies_mlp[j].append(class_accuracy)

    from vime_self import vime_self
    # Train VIME
    vime_self_encoder = vime_self(X_pretrain, p_m, alpha, vime_self_parameters)
    x_train_hat_VIME = vime_self_encoder.predict(X_train)
    x_test_hat_VIME = vime_self_encoder.predict(X_test)
    y_test_hat_VIME_MLP = mlp(x_train_hat_VIME, y_train, x_test_hat_VIME, mlp_parameters)
    vime_accuracies.append(perf_metric(metric, y_test, y_test_hat_VIME_MLP))

    # Calculate accuracy for each class
    y_test_VIME = np.argmax(y_test, axis=1) + 2
    y_test_hat_VIME_MLP = np.argmax(y_test_hat_VIME_MLP, axis=1) + 2
    cm_vime = confusion_matrix(y_test_VIME, y_test_hat_VIME_MLP, labels=range(2, 11))
    for j in range(2, 11):
        idx = j - 2
        class_accuracy = cm_vime[idx, idx] / np.sum(cm_vime[idx, :])
        class_accuracies_vime[j].append(class_accuracy)

Epoch 73: early stopping
Restoring model weights from the end of the best epoch: 23.
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 80ms/step
Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 3.0547
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.7440
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.5410
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.4265
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.4409
Epoch 6/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.3154
Epoch 7/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.3038
Epoch 8/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.2635
Epoch 9/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━



[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Restoring model weights from the end of the best epoch: 100.
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Epoch 81: early stopping
Restoring model weights from the end of the best epoch: 31.
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 3.1207
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.7951
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.5511
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.4710
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.4055
Epoch 6/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0



[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Restoring model weights from the end of the best epoch: 95.
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Restoring model weights from the end of the best epoch: 100.
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 3.1041
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.6912
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.6372
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.4511
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.4462
Epoch 6/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.

# Results

In [None]:
mean_mlp_accuracy = np.mean(mlp_accuracies)
variance_mlp = np.var(mlp_accuracies)
mean_class_accuracies_mlp = {k: np.mean(v) for k, v in class_accuracies_mlp.items()}

mean_vime_accuracy = np.mean(vime_accuracies)
variance_vime = np.var(vime_accuracies)
mean_class_accuracies_vime = {k: np.mean(v) for k, v in class_accuracies_vime.items()}

results[2] = mean_mlp_accuracy
results[3] = mean_vime_accuracy

print("VIME-Self:")
print(f"Mean Accuracy: {mean_vime_accuracy:.5f}")
print(f"Variance: {variance_vime:.5f}\n")

print("Classwise Mean Accuracies:")
for k, v in mean_class_accuracies_vime.items():
    print(f"Class {k}: {v:.5f}")

MLP Performance: 0.9231428571428573
VIME-Self Performance: 0.9314285714285715
