In [1]:
import math
import numpy as np
import site
import pprint
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import Isomap
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics.pairwise import cosine_similarity as cd, euclidean_distances as ed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler, maxabs_scale
from tensorflow.keras import layers, losses, callbacks, Sequential
import tensorflow as tf


In [2]:
class Autoencoder(tf.keras.Model):
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.Dense(int(input_dim/2)+2, activation='tanh'),
            tf.keras.layers.Dense(int(input_dim/2)+1, activation='tanh'),
            tf.keras.layers.Dense(latent_dim, activation='tanh'),
        ])
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(int(input_dim/2)+1, activation='tanh'),
            tf.keras.layers.Dense(int(input_dim/2)+2, activation='tanh'),
            tf.keras.layers.Dense(input_dim, activation='tanh'),
        ])

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


In [7]:
from sklearn.datasets import make_classification
import csv
import time

# range(150,1050,50): #Edw tha vgei to neural!
for features in [10, 20, 30, 40, 50]:
    print('Feature size:', features)
    feature_names = ['F'+str(i) for i in range(features)]
    x, y = make_classification(n_samples=1000, n_features=features, n_informative=int(
        features/2), n_redundant=int(features/4), n_classes=2, shuffle=True, random_state=1)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42)

    scaler = MaxAbsScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    mean = x_train.mean(axis=0)

    pca = PCA(int(features/2), random_state=42)
    pca.fit(x_train, y_train)
    x_test_pca = pca.transform(x_test)

    kpca = KernelPCA(int(features/2), kernel='rbf', random_state=42)
    kpca.fit(x_train, y_train)
    x_test_kpca = kpca.transform(x_test)

    iso = Isomap(n_components=int(features/2))
    iso.fit(x_train, y_train)
    x_test_iso = iso.transform(x_test)

    #callback = callbacks.EarlyStopping(monitor='loss', patience=3, verbose=0, restore_best_weights=True)
    #ae = Autoencoder(len(x_train[0]),int(features/2))
    #ae.compile(optimizer='adam', loss='mae')
    # ae.fit(x_train, x_train,
    #                epochs=200,
    #                shuffle=True,
    #                validation_split=0.1,
    #                callbacks=[callback],
    #                verbose=0)
    #x_test_ae = ae.predict(x_test)

    for ltype in ['locallocal']:
        for ng_technique in ['KNN', 'LatentKNN', 'Clustering']:  # 'KNN', 'LatentKNN',
            lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
            lxdr_kpca = LXDR(kpca, feature_names, x_train)
            lxdr_iso = LXDR(iso, feature_names, x_train)
            #lxdr_ae =  LXDR(ae, feature_names, x_train)

            lxdr_pca.type = ltype
            lxdr_kpca.type = ltype
            lxdr_iso.type = ltype
            #lxdr_ae.type = ltype

            if ng_technique == 'KNN':
                lxdr_pca._set_knn_local()
                lxdr_kpca._set_knn_local()
                lxdr_iso._set_knn_local()
                # lxdr_ae._set_knn_local()
            elif ng_technique == 'LatentKNN':
                lxdr_pca._set_knn_latent_local()
                lxdr_kpca._set_knn_latent_local()
                lxdr_iso._set_knn_latent_local()
                # lxdr_ae._set_knn_latent_local()
            # 10, 50, int(1000/10), int(1000/5), int(2*1000/4),
            for n in [10, 50, int(1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]:
                if ng_technique == 'Clustering':
                    lxdr_pca.birch = {}
                    lxdr_kpca.birch = {}
                    lxdr_iso.birch = {}
                    #lxdr_ae.birch = {}
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                #ae_instance = [[],[],[],[]]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    #ts = time.time()
                    #weights_ae = lxdr_ae.explain_instance(x, n, auto_alpha=True, ng_technique = ng_technique)
                    #te = time.time()-ts
                    #ldrx_instance = np.dot(x, weights_ae.T)
                    #a = x_test_ae[instance]
                    #b = ldrx_instance
                    # ae_instance[0].append(ed([a],[b])[0][0])
                    # ae_instance[1].append(1-cd([a],[b])[0][0])
                    # ae_instance[2].append(mae([a],[b]))
                    # ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_15-01-2023.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    #writer.writerow([features,'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])

    for ltype in ['classic', 'neural']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        #lxdr_ae.type = ltype
        # 'Global', 'KNN', 'LatentKNN',
        for ng_technique in ['Global', 'KNN', 'LatentKNN', 'Clustering']:
            number_of_neigbours = [10, 50, int(
                1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]
            lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
            lxdr_kpca = LXDR(kpca, feature_names, x_train)
            lxdr_iso = LXDR(iso, feature_names, x_train)
            #lxdr_ae =  LXDR(ae, feature_names, x_train)

            lxdr_pca.type = ltype
            lxdr_kpca.type = ltype
            lxdr_iso.type = ltype
            #lxdr_ae.type = ltype

            if ng_technique == 'KNN':
                lxdr_pca._set_knn()
                lxdr_kpca._set_knn()
                lxdr_iso._set_knn()
                # lxdr_ae._set_knn()
            elif ng_technique == 'LatentKNN':
                lxdr_pca._set_knn_latent()
                lxdr_kpca._set_knn_latent()
                lxdr_iso._set_knn_latent()
                # lxdr_ae._set_knn_latent()
            if ng_technique == 'Global':
                number_of_neigbours = [len(x_train)]
                lxdr_pca.global_ = {}
                lxdr_kpca.global_ = {}
                lxdr_iso.global_ = {}
                #lxdr_ae.global_ = {}
            for n in number_of_neigbours:
                if ng_technique == 'Clustering':
                    lxdr_pca.birch = {}
                    lxdr_kpca.birch = {}
                    lxdr_iso.birch = {}
                    #lxdr_ae.birch = {}
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                #ae_instance = [[],[],[],[]]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    #ts = time.time()
                    #weights_ae = lxdr_ae.explain_instance(x, n, auto_alpha=True, ng_technique = ng_technique)
                    #te = time.time()-ts
                    #ldrx_instance = np.dot(x, weights_ae.T)
                    #a = x_test_ae[instance]
                    #b = ldrx_instance
                    # ae_instance[0].append(ed([a],[b])[0][0])
                    # ae_instance[1].append(1-cd([a],[b])[0][0])
                    # ae_instance[2].append(mae([a],[b]))
                    # ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_15-01-2023.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    #writer.writerow([features,'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])


Feature size: 10
Feature size: 20
Feature size: 30
Feature size: 40
Feature size: 50


In [9]:
a = [[1, 2, 3], [0, 1, 2]]
b = [[3, 4, 5], [6, 7, 8]]
c = [[8, 2, 3], [0, 8, 2]]
np.concatenate([a, b, c])


array([[1, 2, 3],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [8, 2, 3],
       [0, 8, 2]])

In [None]:
from sklearn.datasets import make_classification
import csv
import time

# range(150,1050,50): #Edw tha vgei to neural!
for features in [100, 150, 200, 250, 300, 350, 400, 450, 500]:
    print('Feature size:', features)
    feature_names = ['F'+str(i) for i in range(features)]
    x, y = make_classification(n_samples=1000, n_features=features, n_informative=int(
        features/2), n_redundant=int(features/4), n_classes=2, shuffle=True, random_state=1)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42)

    scaler = MaxAbsScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    mean = x_train.mean(axis=0)

    pca = PCA(int(features/2), random_state=42)
    pca.fit(x_train, y_train)
    x_test_pca = pca.transform(x_test)

    kpca = KernelPCA(int(features/2), kernel='rbf', random_state=42)
    kpca.fit(x_train, y_train)
    x_test_kpca = kpca.transform(x_test)

    iso = Isomap(n_components=int(features/2))
    iso.fit(x_train, y_train)
    x_test_iso = iso.transform(x_test)

    #callback = callbacks.EarlyStopping(monitor='loss', patience=3, verbose=0, restore_best_weights=True)
    #ae = Autoencoder(len(x_train[0]),int(features/2))
    #ae.compile(optimizer='adam', loss='mae')
    # ae.fit(x_train, x_train,
    #                epochs=200,
    #                shuffle=True,
    #                validation_split=0.1,
    #                callbacks=[callback],
    #                verbose=0)
    #x_test_ae = ae.predict(x_test)

    for ltype in ['locallocal']:
        for ng_technique in ['KNN', 'LatentKNN', 'Clustering']:  # 'KNN', 'LatentKNN',
            lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
            lxdr_kpca = LXDR(kpca, feature_names, x_train)
            lxdr_iso = LXDR(iso, feature_names, x_train)
            #lxdr_ae =  LXDR(ae, feature_names, x_train)

            lxdr_pca.type = ltype
            lxdr_kpca.type = ltype
            lxdr_iso.type = ltype
            #lxdr_ae.type = ltype

            if ng_technique == 'KNN':
                lxdr_pca._set_knn_local()
                lxdr_kpca._set_knn_local()
                lxdr_iso._set_knn_local()
                # lxdr_ae._set_knn_local()
            elif ng_technique == 'LatentKNN':
                lxdr_pca._set_knn_latent_local()
                lxdr_kpca._set_knn_latent_local()
                lxdr_iso._set_knn_latent_local()
                # lxdr_ae._set_knn_latent_local()
            # 10, 50, int(1000/10), int(1000/5), int(2*1000/4),
            for n in [10, 50, int(1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]:
                if ng_technique == 'Clustering':
                    lxdr_pca.birch = {}
                    lxdr_kpca.birch = {}
                    lxdr_iso.birch = {}
                    #lxdr_ae.birch = {}
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                #ae_instance = [[],[],[],[]]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    #ts = time.time()
                    #weights_ae = lxdr_ae.explain_instance(x, n, auto_alpha=True, ng_technique = ng_technique)
                    #te = time.time()-ts
                    #ldrx_instance = np.dot(x, weights_ae.T)
                    #a = x_test_ae[instance]
                    #b = ldrx_instance
                    # ae_instance[0].append(ed([a],[b])[0][0])
                    # ae_instance[1].append(1-cd([a],[b])[0][0])
                    # ae_instance[2].append(mae([a],[b]))
                    # ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_15-01-2023.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    #writer.writerow([features,'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])

    for ltype in ['classic']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        #lxdr_ae.type = ltype
        # 'Global', 'KNN', 'LatentKNN',
        for ng_technique in ['Global', 'KNN', 'LatentKNN', 'Clustering']:
            number_of_neigbours = [10, 50, int(
                1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]
            lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
            lxdr_kpca = LXDR(kpca, feature_names, x_train)
            lxdr_iso = LXDR(iso, feature_names, x_train)
            #lxdr_ae =  LXDR(ae, feature_names, x_train)

            lxdr_pca.type = ltype
            lxdr_kpca.type = ltype
            lxdr_iso.type = ltype
            #lxdr_ae.type = ltype

            if ng_technique == 'KNN':
                lxdr_pca._set_knn()
                lxdr_kpca._set_knn()
                lxdr_iso._set_knn()
                # lxdr_ae._set_knn()
            elif ng_technique == 'LatentKNN':
                lxdr_pca._set_knn_latent()
                lxdr_kpca._set_knn_latent()
                lxdr_iso._set_knn_latent()
                # lxdr_ae._set_knn_latent()
            if ng_technique == 'Global':
                number_of_neigbours = [len(x_train)]
                lxdr_pca.global_ = {}
                lxdr_kpca.global_ = {}
                lxdr_iso.global_ = {}
                #lxdr_ae.global_ = {}
            for n in number_of_neigbours:
                if ng_technique == 'Clustering':
                    lxdr_pca.birch = {}
                    lxdr_kpca.birch = {}
                    lxdr_iso.birch = {}
                    #lxdr_ae.birch = {}
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                #ae_instance = [[],[],[],[]]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    #ts = time.time()
                    #weights_ae = lxdr_ae.explain_instance(x, n, auto_alpha=True, ng_technique = ng_technique)
                    #te = time.time()-ts
                    #ldrx_instance = np.dot(x, weights_ae.T)
                    #a = x_test_ae[instance]
                    #b = ldrx_instance
                    # ae_instance[0].append(ed([a],[b])[0][0])
                    # ae_instance[1].append(1-cd([a],[b])[0][0])
                    # ae_instance[2].append(mae([a],[b]))
                    # ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_15-01-2023.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    #writer.writerow([features,'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])


Feature size: 100
Feature size: 150
Feature size: 200
Feature size: 250
Feature size: 300
Feature size: 350
Feature size: 400
Feature size: 450
Feature size: 500


In [None]:
from sklearn.datasets import make_classification
import csv
import time

# 10, 20, 30, 40 DONE
# range(150,1050,50): #Edw tha vgei to neural!
for features in [30, 40, 50, 100]:
    print('Feature size:', features)
    feature_names = ['F'+str(i) for i in range(features)]
    x, y = make_classification(n_samples=1000, n_features=features, n_informative=int(
        features/2), n_redundant=int(features/4), n_classes=2, shuffle=True, random_state=1)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42)

    scaler = MaxAbsScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    mean = x_train.mean(axis=0)

    pca = PCA(int(features/2), random_state=42)
    pca.fit(x_train, y_train)
    x_test_pca = pca.transform(x_test)

    kpca = KernelPCA(int(features/2), random_state=42)
    kpca.fit(x_train, y_train)
    x_test_kpca = kpca.transform(x_test)

    iso = Isomap(n_components=int(features/2))
    iso.fit(x_train, y_train)
    x_test_iso = iso.transform(x_test)

    callback = callbacks.EarlyStopping(
        monitor='loss', patience=3, verbose=0, restore_best_weights=True)
    ae = Autoencoder(len(x_train[0]), int(features/2))
    ae.compile(optimizer='adam', loss='mae')
    ae.fit(x_train, x_train,
           epochs=200,
           shuffle=True,
           validation_split=0.1,
           callbacks=[callback],
           verbose=0)
    x_test_ae = ae.predict(x_test)

    lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
    lxdr_kpca = LXDR(kpca, feature_names, x_train)
    lxdr_iso = LXDR(iso, feature_names, x_train)
    lxdr_ae = LXDR(ae, feature_names, x_train)

    for ltype in ['locallocal']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        lxdr_ae.type = ltype
        for ng_technique in ['KNN', 'LatentKNN', 'Clustering']:
            for n in [10, 50, int(1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]:
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                ae_instance = [[], [], [], []]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    ts = time.time()
                    weights_ae = lxdr_ae.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_ae.T)
                    a = x_test_ae[instance]
                    b = ldrx_instance
                    ae_instance[0].append(ed([a], [b])[0][0])
                    ae_instance[1].append(1-cd([a], [b])[0][0])
                    ae_instance[2].append(mae([a], [b]))
                    ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_final.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    writer.writerow([features, 'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(
                        ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])

    for ltype in ['classic', 'neural']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        lxdr_ae.type = ltype
        for ng_technique in ['Global', 'KNN', 'LatentKNN', 'Clustering']:
            number_of_neigbours = [10, 50, int(
                1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]
            if ng_technique == 'Global':
                number_of_neigbours = [len(x_train)]
                lxdr_pca.global_ = {}
                lxdr_kpca.global_ = {}
                lxdr_iso.global_ = {}
                lxdr_ae.global_ = {}
            for n in number_of_neigbours:
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                ae_instance = [[], [], [], []]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    ts = time.time()
                    weights_ae = lxdr_ae.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_ae.T)
                    a = x_test_ae[instance]
                    b = ldrx_instance
                    ae_instance[0].append(ed([a], [b])[0][0])
                    ae_instance[1].append(1-cd([a], [b])[0][0])
                    ae_instance[2].append(mae([a], [b]))
                    ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_final.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    writer.writerow([features, 'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(
                        ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])


In [7]:
from sklearn.datasets import make_classification
import csv
import time

# 10, 20, 30, 40 DONE
for features in [50]:  # range(150,1050,50): #Edw tha vgei to neural!
    print('Feature size:', features)
    feature_names = ['F'+str(i) for i in range(features)]
    x, y = make_classification(n_samples=1000, n_features=features, n_informative=int(
        features/2), n_redundant=int(features/4), n_classes=2, shuffle=True, random_state=1)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42)

    scaler = MaxAbsScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    mean = x_train.mean(axis=0)

    pca = PCA(int(features/2), random_state=42)
    pca.fit(x_train, y_train)
    x_test_pca = pca.transform(x_test)

    kpca = KernelPCA(int(features/2), random_state=42)
    kpca.fit(x_train, y_train)
    x_test_kpca = kpca.transform(x_test)

    iso = Isomap(n_components=int(features/2))
    iso.fit(x_train, y_train)
    x_test_iso = iso.transform(x_test)

    callback = callbacks.EarlyStopping(
        monitor='loss', patience=3, verbose=0, restore_best_weights=True)
    ae = Autoencoder(len(x_train[0]), int(features/2))
    ae.compile(optimizer='adam', loss='mae')
    ae.fit(x_train, x_train,
           epochs=200,
           shuffle=True,
           validation_split=0.1,
           callbacks=[callback],
           verbose=0)
    x_test_ae = ae.predict(x_test)

    lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
    lxdr_kpca = LXDR(kpca, feature_names, x_train)
    lxdr_iso = LXDR(iso, feature_names, x_train)
    lxdr_ae = LXDR(ae, feature_names, x_train)

    for ltype in ['neural']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        lxdr_ae.type = ltype
        for ng_technique in ['KNN']:
            number_of_neigbours = [int(3*1000/4)]
            if ng_technique == 'Global':
                number_of_neigbours = [len(x_train)]
                lxdr_pca.global_ = {}
                lxdr_kpca.global_ = {}
                lxdr_iso.global_ = {}
                lxdr_ae.global_ = {}
            for n in number_of_neigbours:
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                ae_instance = [[], [], [], []]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    ts = time.time()
                    weights_ae = lxdr_ae.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_ae.T)
                    a = x_test_ae[instance]
                    b = ldrx_instance
                    ae_instance[0].append(ed([a], [b])[0][0])
                    ae_instance[1].append(1-cd([a], [b])[0][0])
                    ae_instance[2].append(mae([a], [b]))
                    ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_final.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    writer.writerow([features, 'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(
                        ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])
    for ltype in ['neural']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        lxdr_ae.type = ltype
        for ng_technique in ['LatentKNN', 'Clustering']:
            number_of_neigbours = [10, 50, int(
                1000/10), int(1000/5), int(2*1000/4), int(3*1000/4)]
            if ng_technique == 'Global':
                number_of_neigbours = [len(x_train)]
                lxdr_pca.global_ = {}
                lxdr_kpca.global_ = {}
                lxdr_iso.global_ = {}
                lxdr_ae.global_ = {}
            for n in number_of_neigbours:
                pca_weights = [[], [], [], []]
                pca_instance = [[], [], [], []]
                kpca_instance = [[], [], [], []]
                iso_instance = [[], [], [], []]
                ae_instance = [[], [], [], []]
                instance = 0
                for x in x_test:
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    pca_weights[0].append(ed([a], [b])[0][0])
                    pca_weights[1].append(1-cd([a], [b])[0][0])
                    pca_weights[2].append(mae([a], [b]))
                    pca_weights[3].append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[instance]
                    b = ldrx_instance
                    pca_instance[0].append(ed([a], [b])[0][0])
                    pca_instance[1].append(1-cd([a], [b])[0][0])
                    pca_instance[2].append(mae([a], [b]))
                    pca_instance[3].append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[instance]
                    b = ldrx_instance
                    kpca_instance[0].append(ed([a], [b])[0][0])
                    kpca_instance[1].append(1-cd([a], [b])[0][0])
                    kpca_instance[2].append(mae([a], [b]))
                    kpca_instance[3].append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[instance]
                    b = ldrx_instance
                    iso_instance[0].append(ed([a], [b])[0][0])
                    iso_instance[1].append(1-cd([a], [b])[0][0])
                    iso_instance[2].append(mae([a], [b]))
                    iso_instance[3].append(te)

                    ts = time.time()
                    weights_ae = lxdr_ae.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_ae.T)
                    a = x_test_ae[instance]
                    b = ldrx_instance
                    ae_instance[0].append(ed([a], [b])[0][0])
                    ae_instance[1].append(1-cd([a], [b])[0][0])
                    ae_instance[2].append(mae([a], [b]))
                    ae_instance[3].append(te)

                    instance = instance + 1
                with open('scalability_final.csv', 'a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features, 'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(
                        pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])
                    writer.writerow([features, 'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(
                        pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])
                    writer.writerow([features, 'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(
                        kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])
                    writer.writerow([features, 'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(
                        iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])
                    writer.writerow([features, 'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(
                        ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])


Feature size: 50


In [37]:
from sklearn.datasets import make_classification
import csv
import time

# 10, 20, 30, 40 DONE
for features in [100]:  # range(150,1050,50): #Edw tha vgei to neural!
    print('Feature size:', features)
    feature_names = ['F'+str(i) for i in range(features)]
    x, y = make_classification(n_samples=1000, n_features=features, n_informative=int(
        features/2), n_redundant=int(features/4), n_classes=2, shuffle=True, random_state=1)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42)

    scaler = MaxAbsScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    mean = x_train.mean(axis=0)

    pca = PCA(int(features/2), random_state=42)
    pca.fit(x_train, y_train)
    x_test_pca = pca.transform(x_test)

    kpca = KernelPCA(int(features/2), random_state=42)
    kpca.fit(x_train, y_train)
    x_test_kpca = kpca.transform(x_test)

    iso = Isomap(n_components=int(features/2))
    iso.fit(x_train, y_train)
    x_test_iso = iso.transform(x_test)

    callback = callbacks.EarlyStopping(
        monitor='loss', patience=3, verbose=0, restore_best_weights=True)
    ae = Autoencoder(len(x_train[0]), int(features/2))
    ae.compile(optimizer='adam', loss='mae')
    ae.fit(x_train, x_train,
           epochs=200,
           shuffle=True,
           validation_split=0.1,
           callbacks=[callback],
           verbose=0)
    x_test_ae = ae.predict(x_test)

    lxdr_pca = LXDR(pca, feature_names, x_train, mean=mean)
    lxdr_kpca = LXDR(kpca, feature_names, x_train)
    lxdr_iso = LXDR(iso, feature_names, x_train)
    lxdr_ae = LXDR(ae, feature_names, x_train)

    print("Let's go!")
    for ltype in ['locallocal']:
        lxdr_pca.type = ltype
        lxdr_kpca.type = ltype
        lxdr_iso.type = ltype
        lxdr_ae.type = ltype
        for ng_technique in ['KNN']:
            for n in [int(1000/5)]:

                def bob(ind, x_test, n, ng_technique, pca, lxdr_pca, lxdr_kpca, lxdr_iso, lxdr_ae, mean, x_test_pca, x_test_kpca, x_test_iso, x_test_ae):
                    result = []
                    x = x_test[ind]
                    ts = time.time()
                    weights_pca = lxdr_pca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    a = pca.components_.reshape((1, -1))[0]
                    b = np.array(weights_pca).reshape((1, -1))[0]
                    result.append(ed([a], [b])[0][0])
                    result.append(1-cd([a], [b])[0][0])
                    result.append(mae([a], [b]))
                    result.append(te)
                    ldrx_instance = np.dot(x-mean, weights_pca.T)
                    a = x_test_pca[ind]
                    b = ldrx_instance
                    result.append(ed([a], [b])[0][0])
                    result.append(1-cd([a], [b])[0][0])
                    result.append(mae([a], [b]))
                    result.append(te)

                    ts = time.time()
                    weights_kpca = lxdr_kpca.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_kpca.T)
                    a = x_test_kpca[ind]
                    b = ldrx_instance
                    result.append(ed([a], [b])[0][0])
                    result.append(1-cd([a], [b])[0][0])
                    result.append(mae([a], [b]))
                    result.append(te)

                    ts = time.time()
                    weights_iso = lxdr_iso.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_iso.T)
                    a = x_test_iso[ind]
                    b = ldrx_instance
                    result.append(ed([a], [b])[0][0])
                    result.append(1-cd([a], [b])[0][0])
                    result.append(mae([a], [b]))
                    result.append(te)

                    ts = time.time()
                    weights_ae = lxdr_ae.explain_instance(
                        x, n, auto_alpha=True, ng_technique=ng_technique)
                    te = time.time()-ts
                    ldrx_instance = np.dot(x, weights_ae.T)
                    a = x_test_ae[ind]
                    b = ldrx_instance
                    result.append(ed([a], [b])[0][0])
                    result.append(1-cd([a], [b])[0][0])
                    result.append(mae([a], [b]))
                    result.append(te)
                    return [result]

                from joblib import Parallel, delayed
                tss = time.time()
                with Parallel(n_jobs=4, require='sharedmem') as parallel:
                    results = parallel(delayed(bob)(ind, x_test, n, ng_technique, pca, lxdr_pca, lxdr_kpca, lxdr_iso,
                                       lxdr_ae, mean, x_test_pca, x_test_kpca, x_test_iso, x_test_ae) for ind in range(len(x_test[:100])))
                print(time.time()-tss)
                #from multiprocessing import Pool
                # with Pool(4) as p:
                #    results = p.map(bob, x_test)
                """
                with open('scalability_finalw.csv','a', encoding='UTF8') as f:
                    writer = csv.writer(f)
                    siz = len(x_test)
                    writer.writerow([features,'pca_w', ltype, ng_technique, n, np.mean(pca_weights[0]), np.mean(pca_weights[1]), np.mean(pca_weights[2]), np.mean(pca_weights[3])])                    
                    writer.writerow([features,'pca_i', ltype, ng_technique, n, np.mean(pca_instance[0]), np.mean(pca_instance[1]), np.mean(pca_instance[2]), np.mean(pca_instance[3])])                    
                    writer.writerow([features,'kpca', ltype, ng_technique, n, np.mean(kpca_instance[0]), np.mean(kpca_instance[1]), np.mean(kpca_instance[2]), np.mean(kpca_instance[3])])                   
                    writer.writerow([features,'iso', ltype, ng_technique, n, np.mean(iso_instance[0]), np.mean(iso_instance[1]), np.mean(iso_instance[2]), np.mean(iso_instance[3])])                   
                    writer.writerow([features,'ae', ltype, ng_technique, n, np.mean(ae_instance[0]), np.mean(ae_instance[1]), np.mean(ae_instance[2]), np.mean(ae_instance[3])])                   
                """


Feature size: 100
Let's go!
833.4530341625214
