# How to add new CondDensityEstimators, CauseClusterers, and EffectClusterers to CFL
While the CFL software package comes with pre-implemented (either by us or 
Scikit-learn) models for conditional density estimation and clustering, it has
also been designed to make it easy to try out new models. To do so, there are
two main steps:
- Make a python class for your model that inherits from the appropriate
  abstract class (either CDEModel, CCModel, or ECModel)
- pass it in to your Experiment

In [None]:
import numpy as np
from cfl import Experiment
from cfl.cond_density_estimation import CDEModel

In [30]:
# load toy data
data_info = {'X_dims' : (10000, 4),
             'Y_dims' : (10000, 3),
             'Y_type' : 'continuous'}
X = np.random.normal(size=data_info['X_dims'])
Y = np.random.normal(size=data_info['Y_dims'])
print(X.shape)
print(Y.shape)

(10000, 4)
(10000, 3)


In [27]:
# make a new conditional density estimator that inherits CDEModel. Your class
# must implement all methods specified by CDEModel
class MyCDE(CDEModel):
    def __init__(self, data_info, model_params):
        self.data_info = data_info
        self.model_params = model_params

    def train(self, dataset, prev_results=None):
        pyx = np.random.normal(size=dataset.get_Y().shape)
        print(pyx.shape)
        return {'pyx' : pyx}
    
    def predict(self, dataset, prev_results=None):
        pyx = np.random.normal(size=dataset.get_Y().shape)
        return {'pyx' : pyx}
    
    def load_model(self, path):
        pass

    def save_model(self, path):
        pass
    
    def get_model_params(self):
        return self.model_params

In [28]:
# MyCDE can be passed in as the value for the 'model' key in CDE_params, 
# instead of a string name for pre-defined model
CDE_params = {'model' : MyCDE(data_info, model_params={})}
CC_params =  {'model' : 'KMeans',
              'model_params' : {'n_clusters' : 2}}
block_names = ['CondDensityEstimator', 'CauseClusterer']
block_params = [CDE_params, CC_params]
my_exp = Experiment(X_train=X, Y_train=Y, data_info=data_info, 
                    block_names=block_names, block_params=block_params, results_path=None)

model_params not specified in input, defaulting to {}
verbose not specified in input, defaulting to 1
tune not specified in input, defaulting to False
user_input not specified in input, defaulting to True
verbose not specified in input, defaulting to 1


In [29]:
my_exp.train()

#################### Beginning CFL Experiment training. ####################
Beginning CondDensityEstimator training...
(10000, 3)
CondDensityEstimator training complete.
Beginning CauseClusterer training...
CauseClusterer training complete.
Experiment training complete.


{'CondDensityEstimator': {'pyx': array([[-1.58177497, -1.73465965, -1.7839942 ],
         [ 1.74629161,  0.31239298, -0.59897587],
         [-0.70033947,  0.75982457,  0.54256081],
         ...,
         [-0.45122061,  1.87503861,  0.94817103],
         [-0.23302877,  0.04509947, -0.3027354 ],
         [-0.77217984,  0.4251049 ,  1.10276635]])},
 'CauseClusterer': {'x_lbls': array([1, 0, 1, ..., 1, 1, 1], dtype=int32)}}