In [1]:
import os
import numpy as np
from os.path import join

In [2]:
dataset_spec = {'dataset_1': {'original_dims': 30,
                              'output_dims': 2,
                              'max_additional_dims': 50,
                              'mean_val': [list(2 * np.round(np.random.randn(30), decimals=3)),
                                            list(-2 * np.round(np.random.randn(30), decimals=3))],
                              'std_val': [list(0.5 * np.ones(30)),
                                           list(0.5 * np.ones(30))],
                              'noise': 'gaussian',
                              'noise_mean': 0.,
                              'noise_sigma': 0.5,
                              'n_samples_per_class': 5000
                             }
                }

In [3]:
class Hyperparameters(object):
    """ Add hyper-parameters in init so when you read a json, it will get updated as your latest code. """
    def __init__(self,
                 learning_rate=5e-2,
                 architecture=None,
                 epochs=500,
                 batch_size=10,
                 loss='cross_entropy',
                 optimizer='sgd',
                 lr_at_plateau=True,
                 reduction_factor=None,
                 validation_check=True):
        """
        :param learning_rate: float, the initial value for the learning rate
        :param architecture: str, the architecture types
        :param epochs: int, the number of epochs we want to train
        :param batch_size: int, the dimension of the batch size
        :param loss: str, loss type, cross entropy or square loss
        :param optimizer: str, the optimizer type.
        :param lr_at_plateau: bool, protocol to decrease the learning rate.
        :param reduction_factor, int, the factor which we use to reduce the learning rate.
        :param validation_check: bool, if we want to keep track of validation loss as a stopping criterion.
        """
        self.learning_rate = learning_rate
        self.architecture = architecture
        self.epochs = epochs
        self.batch_size = batch_size
        self.loss = loss
        self.optimizer = optimizer
        self.lr_at_plateau = lr_at_plateau
        self.reduction_factor = reduction_factor
        self.validation_check = validation_check


class Dataset:
    """ Here we save the dataset specific related to each experiment. The name of the dataset,
    the scenario, if we modify the original dataset, and the dimensions of the input.
    This is valid for the modified_MNIST_dataset, verify if it is going to be valid next"""
    # TODO: add output_dims
    def __init__(self,
                 scenario=1,
                 original_dims=30,
                 output_dims=2,
                 additional_dims=2,
                 mean_val=None,
                 std_val=None,
                 noise='gaussian',
                 noise_mean=0.,
                 noise_sigma=0.5,
                 n_training=10,
                 redundancy_amount=None):
        """
        :param scenario: int, the learning paradigm
        :param original_dims: int, name of the folder of the experiments
        :param output_dims: int, dimensionality of the output
        :param additional_dims: int, additional noise
        :param mean_val:
        :param std_val:
        :param noise: str or None
        :param noise_mean: int or np.array
        :param noise_sigma: int or np.array
        :param n_training: int, number of training examples
        :param redundancy_amount, percentage of redundant features, scenario 4 only
        """
        self.scenario = scenario
        self.original_dims = original_dims
        self.output_dims = output_dims
        self.additional_dims = additional_dims
        self.mean_val = mean_val
        self.std_val = std_val
        self.noise = noise
        self.noise_mean = noise_mean
        self.noise_sigma = noise_sigma
        self.n_training = n_training
        self.redundancy_amount = redundancy_amount


class Experiment(object):
    """
    This class represents your experiment.
    It includes all the classes above and some general
    information about the experiment index.
    IF YOU ADD ANOTHER CLASS, MAKE SURE TO INCLUDE IT HERE.
    """
    def __init__(self,
                 id,
                 output_path,
                 train_completed=False,
                 hyper=None,
                 dataset=None):
        """
        :param id: index of output data folder
        :param output_path: output directory
        :param train_completed: bool, it indicates if the experiment has already been trained
        :param hyper: instance of Hyperparameters class
        :param dataset: instance of Dataset class
        """
        if hyper is None:
            hyper = Hyperparameters()
        if dataset is None:
            dataset = Dataset()

        self.id = id
        self.output_path = output_path
        self.train_completed = train_completed
        self.hyper = hyper
        self.dataset = dataset

In [4]:
exp = Experiment(id=0, output_path='./exp_output')

In [34]:
key_dataset = 'dataset_1'

exp.dataset.scenario = 4
print(exp.dataset.additional_dims)
exp.dataset.redundancy_amount = 0.5
print(exp.dataset.redundancy_amount)


data_generator = DatasetGenerator(data_path=key_dataset, 
                                  # dct_dataset=dataset_spec[key_dataset],
                                  load=True,
                                  exp=exp)

2
0.5


In [35]:
[X_splits, y_splits] = data_generator.generate_input_experiment()

In [36]:
X_splits[0].shape

(20, 32)

In [37]:
y_splits[0][:, idx].shape

X_splits[0] = X_splits[0][:, idx]

NameError: name 'idx' is not defined

In [None]:
X_splits[0].shape

In [None]:
self

In [None]:
[X_splits, y_splits] = data_generator.add_mixture(n_noise_feat=1, n_rdndt_feat=1)

In [None]:
np.linalg.matrix_rank(X_splits[0])

In [None]:
data_generator.X_splits[0]

In [None]:
y = np.array([[1,0,0,0,0],
              [0,1,0,0,0],
              [0,1,0,0,0]])

np.unique(y, axis=0)