In [1]:
import numpy as np
import pandas as pd 
import time
import math
from sklearn.metrics import mean_squared_error
from scipy.linalg.interpolative import estimate_spectral_norm
from ipynb.fs.full.Traditional_PCA import do_pca
from ipynb.fs.full.DBPCA import dbpca
from ipynb.fs.full.DBPCA import dbpca_with_forgetting_factor
from ipynb.fs.full.SyntheticDataGeneration import generate_gaussian_dataset_concept_drift

In [None]:
def autolabel(rects, ax):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = round(rect.get_height(), 6)
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')
        
def draw_bar_chart(df, label, title, metric='mse', 
                   timeunit = None,
                   figwidth=8, figheight=6,  
                   colors=('#D81B60', '#047562', '#1E88E5'), 
                   hatch=('-', '\\', '/')):
    labels = df[label]
    
    pca = df[f'{metric}_pca'].round(2)
    pca_online = df[f'{metric}_pca_online'].round(2)
    inc_pca = df[f'{metric}_inc_pca'].round(2)
    
    fig, ax = plt.subplots(figsize=(figwidth * 2, figheight))

    # set width of bar
    barWidth = 0.25

    # Set position of bar on X axis
    r1 = np.arange(len(labels))
    r2 = [x + barWidth for x in r1]
    r3 = [x + barWidth for x in r2]

    # Make the plot
    rects1 = ax.bar(r1, pca,
                    color=colors[0],
                    width=barWidth, edgecolor='white',
                    label='PCA',
                    hatch=hatch[0])
    rects2 = ax.bar(r2, pca_online,
                    color=colors[1],
                    width=barWidth, edgecolor='white',
                    label='PCA Online',
                    hatch=hatch[1])
    rects3 = ax.bar(r3, inc_pca,
                    color=colors[2],
                    width=barWidth, edgecolor='white',
                    label='Incremental PCA with SVD',
                    hatch=hatch[2])

    # Add some text for labels, title and custom x-axis tick labels, etc.
    if timeunit:
        metric = f'{metric} ({timeunit})'
    ax.set_ylabel(f'{metric}')
    ax.set_title(title, y=-0.1)
    ax.set_xticks(r2)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    ax.legend()

    autolabel(rects1, ax)
    autolabel(rects2, ax)
    autolabel(rects3, ax)

    fig.tight_layout()
    plt.show()

In [2]:
def run_dbpca_with_forgetting_factor_experiments(X, k, d, f, ndist, nobjects, B_percentage=None):
    X = generate_gaussian_dataset_concept_drift(nattr, ndist, nobjects)
    X = X.T
    if B_percentage is None:
        B = nobjects
    else:
        B = nobjects * B_percentage
    Q = dbpca_with_forgetting_factor(B, X, k, d, f)
    ## compute PCA 
    Y = X.T.dot(Q)
    ## get reconstruction
    X_est = Y.dot(Q.T)
    ## compute reconstruction error
    spectral_norm = estimate_spectral_norm((X - X_est.T))
    mse = mean_squared_error(X, X_est.T)
    return spectral_norm, mse

In [7]:
def run_various_f(F, k, d, ndist, nobjects, B_percentage=None):
    X = generate_gaussian_dataset_concept_drift(nattr=d, ndist=ndist, nobjects=nobjects)
    X = X.T
    if B_percentage is None:
        B = nobjects
    else:
        B = nobjects * B_percentage
    spectral_norms = list()
    mses = list()
    fs = list()
    for f in F:
        Q = dbpca_with_forgetting_factor(B, X, k, d, f)
        ## compute PCA 
        Y = X.T.dot(Q)
        ## get reconstruction
        X_est = Y.dot(Q.T)
        ## compute reconstruction error
        spectral_norms.append(estimate_spectral_norm((X - X_est.T)))
        mses.append(mean_squared_error(X, X_est.T))
        fs.append(f)
    result = {}
    result['f'] = fs
    result['spectral_norm'] = spectral_norms
    result['mse'] = mses
    return pd.DataFrame(result)

In [9]:
d = 3
k = 3
ndist = 100
nobjects = 100
F = np.array(range(1,11))*0.1
df_various_f = run_various_f(F, k, d, ndist, nobjects, B_percentage=None)
df_various_f

Unnamed: 0,f,spectral_norm,mse
0,0.1,2.13432e-14,3.246481e-32
1,0.2,2.134327e-14,3.246481e-32
2,0.3,3.068773e-14,5.427732e-32
3,0.4,2.134328e-14,3.246481e-32
4,0.5,4.882412e-14,1.210713e-31
5,0.6,3.068773e-14,5.427732e-32
6,0.7,7.344504e-14,2.475087e-31
7,0.8,2.134329e-14,3.246481e-32
8,0.9,4.401157e-14,1.003299e-31
9,1.0,6.929878e-14,1.9631880000000001e-31
