# How to add new CondDensityEstimators, CauseClusterers, and EffectClusterers to CFL
While the CFL software package comes with pre-implemented (either by us or 
Scikit-learn) models for conditional density estimation and clustering, it has
also been designed to make it easy to try out new models. To do so, there are
two main steps:
- Make a python class for your model that inherits from the appropriate
  abstract class (either CDEModel, CCModel, or ECModel)
- pass it in to your Experiment

In [1]:
import numpy as np
from cfl import Experiment
from cfl.cond_density_estimation import CDEModel

In [2]:
# generate toy data
data_info = {'X_dims' : (10000, 5),
             'Y_dims' : (10000, 3),
             'Y_type' : 'continuous'}
X = np.random.normal(size=data_info['X_dims'])
Y = np.random.normal(size=data_info['Y_dims'])
print(X.shape)
print(Y.shape)

(10000, 5)
(10000, 3)


In [3]:
# make a new conditional density estimator that inherits CDEModel. Your class
# must implement all methods specified by CDEModel
class MyCDE(CDEModel):
    def __init__(self, data_info, model_params):
        self.data_info = data_info
        self.model_params = model_params

    def train(self, dataset, prev_results=None):
        pyx = np.random.normal(size=dataset.get_Y().shape)
        return {'pyx' : pyx}
    
    def predict(self, dataset, prev_results=None):
        pyx = np.random.normal(size=dataset.get_Y().shape)
        return {'pyx' : pyx}
    
    def load_model(self, path):
        pass

    def save_model(self, path):
        pass
    
    def get_model_params(self):
        return self.model_params

In [4]:
# MyCDE can be passed in as the value for the 'model' key in CDE_params, 
# instead of a string name for pre-defined model
CDE_params = {'model' : MyCDE(data_info, model_params={})}

CC_params =  {'model' : 'KMeans',
              'model_params' : {'n_clusters' : 2}}
              
block_names = ['CondDensityEstimator', 'CauseClusterer']
block_params = [CDE_params, CC_params]
my_exp = Experiment(X_train=X, Y_train=Y, data_info=data_info, 
                    block_names=block_names, block_params=block_params, results_path=None)

Block: model_params not specified in input, defaulting to {}
Block: verbose not specified in input, defaulting to 1
Block: tune not specified in input, defaulting to False
Block: user_input not specified in input, defaulting to True
Block: verbose not specified in input, defaulting to 1


In [5]:
my_exp.train()

#################### Beginning CFL Experiment training. ####################
Beginning CondDensityEstimator training...
CondDensityEstimator training complete.
Beginning CauseClusterer training...
CauseClusterer training complete.
Experiment training complete.


{'CondDensityEstimator': {'pyx': array([[ 0.53989344,  0.37887031, -0.23632235],
         [-0.03205369,  0.87681685, -0.60290023],
         [ 0.95679393,  0.82015093,  0.97038817],
         ...,
         [-0.88621743, -0.38655032, -0.07755114],
         [-0.66582756,  1.6674345 ,  1.13050867],
         [-0.01679005,  1.91682365, -1.15260229]])},
 'CauseClusterer': {'x_lbls': array([1, 1, 0, ..., 1, 0, 1], dtype=int32)}}