In [9]:
import os
import numpy as np

import matplotlib as mpl
from matplotlib import gridspec
import matplotlib.pyplot as plt
import seaborn as sns
mpl.rcParams['text.usetex'] = True
mpl.rcParams['text.latex.preamble'] = r'\usepackage{libertine}'
mpl.rc('font', family='serif')
from matplotlib.offsetbox import AnchoredText

from src.pipelines import dataset_outliers

In [13]:
num_devices = 100
num_data = 100
dims = 100
subspace_frac = 0.2
frac_outlying_devices = 0.05
frac_outlying_data = 0.03
gamma = 0.5
delta = 0.5

In [14]:
data, outliers = dataset_outliers(num_devices=num_devices, 
                                  n=num_data, 
                                  dims=dims, 
                                  subspace_frac=subspace_frac, 
                                  frac_outlying_devices=frac_outlying_devices, 
                                  frac_outlying_data=frac_outlying_data, 
                                  gamma=gamma, 
                                  delta=delta)

In [16]:
save = True
if save:
    params_str = "{}_{}_{}_{}_{}_{}_{}_{}".format(num_devices, 
                                        num_data, 
                                        dims, 
                                        subspace_frac, 
                                        frac_outlying_devices,
                                        frac_outlying_data, 
                                        gamma, 
                                        delta)
    dataname = os.path.join(os.getcwd(), "data", "synth", params_str + "_d")
    outname = os.path.join(os.getcwd(), "data", "synth", params_str + "_o")
    np.save(dataname, data)
    np.save(outname, outliers)

In [5]:
plotting = False
if plotting:
    fig = plt.figure()

    markers = ["o", "v", "s"]
    ax = plt.subplot(221)
    gamma = 0
    delta = 0
    data, outliers = dataset_outliers(num_devices=num_devices, 
                                      n=num_data, 
                                      dims=dims, 
                                      subspace_frac=subspace_frac, 
                                      frac_outlying_devices=frac_outlying_devices, 
                                      frac_outlying_data=frac_outlying_data, 
                                      gamma=gamma, 
                                      delta=delta)
    for i, d in enumerate(data):
        plt.scatter(d.T[0], d.T[1], alpha=0.7, marker=markers[i])
    text = AnchoredText(r"$(\gamma={},\ \delta={})$".format(gamma, delta), loc=4, frameon=False, pad=0.0, prop={"backgroundcolor": 'white'})
    plt.setp(text.patch, facecolor='white', alpha=0.75)
    ax.add_artist(text)
    
    ax = plt.subplot(222)
    gamma = 1
    delta = 0
    data, outliers = dataset_outliers(num_devices=num_devices, 
                                      n=num_data, 
                                      dims=dims, 
                                      subspace_frac=subspace_frac, 
                                      frac_outlying_devices=frac_outlying_devices, 
                                      frac_outlying_data=frac_outlying_data, 
                                      gamma=gamma, 
                                      delta=delta)
    for i, d in enumerate(data):
        plt.scatter(d.T[0], d.T[1], alpha=0.7, marker=markers[i])
    text = AnchoredText(r"$(\gamma={},\ \delta={})$".format(gamma, delta), loc=4, frameon=False, pad=0.0, prop={"backgroundcolor": 'white'})
    plt.setp(text.patch, facecolor='white', alpha=0.75)
    ax.add_artist(text)
    
    ax = plt.subplot(223)
    gamma = 0
    delta = 1
    data, outliers = dataset_outliers(num_devices=num_devices, 
                                      n=num_data, 
                                      dims=dims, 
                                      subspace_frac=subspace_frac, 
                                      frac_outlying_devices=frac_outlying_devices, 
                                      frac_outlying_data=frac_outlying_data,
                                      gamma=gamma, 
                                      delta=delta)
    for i, d in enumerate(data):
        plt.scatter(d.T[0], d.T[1], alpha=0.7, marker=markers[i])
    text = AnchoredText(r"$(\gamma={},\ \delta={})$".format(gamma, delta), loc=4, frameon=False, pad=0.0, prop={"backgroundcolor": 'white'})
    plt.setp(text.patch, facecolor='white', alpha=0.75)
    ax.add_artist(text)
    
    
    ax = plt.subplot(224)
    gamma = 1
    delta = 1
    data, outliers = dataset_outliers(num_devices=num_devices, 
                                      n=num_data, 
                                      dims=dims, 
                                      subspace_frac=subspace_frac, 
                                      frac_outlying_devices=frac_outlying_devices, 
                                      frac_outlying_data=frac_outlying_data, 
                                      gamma=gamma, 
                                      delta=delta)
    for i, d in enumerate(data):
        plt.scatter(d.T[0], d.T[1], alpha=0.7, marker=markers[i])
    text = AnchoredText(r"$(\gamma={},\ \delta={})$".format(gamma, delta), loc=4, frameon=False, pad=0.0, prop={"backgroundcolor": 'white'})
    plt.setp(text.patch, facecolor='white', alpha=0.75)
    ax.add_artist(text)

    fig.add_subplot(111, frame_on=False) 
    plt.tick_params(labelcolor="none", bottom=False, left=False)
    plt.xlabel("Dim 1")
    plt.ylabel("Dim 2")

    plt.tight_layout()
    plt.savefig(os.path.join(os.getcwd(), "figures", "data_different_params.pdf"))
    plt.show()