**TODO**
* Create parameter dictionary
* `blockTools` wrapper
  * Transform raw output
* Write cross-validation process

### Import modules

In [14]:
import numpy as np
import pandas as pd

import sklearn.datasets as datasets
import sklearn.linear_model as linear_model

from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

### Declare parameters

In [102]:
Y_TYPE = 'binary'
K = 10

In [None]:
P = {'data': {'y_type': 'binary',
              'n_units': 100,
              'n_features': 20, 'n_informative': 2}}

### Define functions

In [148]:
def make_synth_data(P):
    '''Create a synthetic dataset using sample generators from scikit-learn. The target variable
    is either binary, or continuous.'''
    if P['data']['y_type'] == 'discrete':
        X, y = datasets.make_classification(n_samples=P['data']['n_units'],
                                            n_features=['data']['n_features'],
                                            n_informative=['data']['n_informative'],
                                            n_redundant=0, n_repeated=0, n_classes=['data']['n_classes'],
                                            n_clusters_per_class=0, weights=['data']['weights'], flip_y=0.01,
                                            class_sep=1.0, hypercube=False, shift=0.0, scale=1.0,
                                            shuffle=True, random_state=None)
    elif P['data']['y_type'] == 'continuous':
        X, y = datasets.make_regression(n_samples=100, n_features=100, n_informative=10,
                                        
                                        n_targets=1, bias=0.0, effective_rank=None,
                                        tail_strength=0.5, noise=0.0, shuffle=True, coef=False,
                                        random_state=None)
    return X, y

def block(X):
    '''Assign each unit to a block.'''
    # Activate pandas conversion support
    pandas2ri.activate()
    # Import blockTools
    block = importr('blockTools')
    # Convert X to pandas DataFrame 
    df = pd.DataFrame(X, columns=['v{}'.format(i + 1) for i in xrange(X.shape[1])]).reset_index()
    # Perform blocking
    blocks = block.block(df, id_vars='index', block_vars='v1', n_tr=K)
    # Extract assignment DataFrame
    blocks = pandas2ri.ri2py_dataframe(blocks.rx2('blocks').rx2('1'))
    # Remove column of max within-pair distance
    max_dist = blocks.pop('Max Distance')
    blocks.columns = np.arange(K)
    blocks = blocks.T.stack()
    blocks.index = blocks.index.droplevel(1)
    blocks = blocks.reset_index()
    blocks.columns = ['k', 'unit']
    blocks['unit'] = blocks['unit'].astype(int)
    return blocks, max_dist

def fit_model():
    model = linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0,
                                            fit_intercept=True, intercept_scaling=1,
                                            class_weight=None, random_state=None, solver='liblinear',
                                            max_iter=100, multi_class='ovr', verbose=0,
                                            warm_start=False, n_jobs=1)
    for k in xrange(K):
        is_test = np.array(folds.ix[folds['k'] == k, 'unit'])
        model.fit(X[~is_test, ], y[~is_test])
        print model.score(X[is_test, ], y[is_test])

### Perform analysis

In [149]:
# Create synthetic data
X, y = simulate_data(Y_TYPE)

# Use blockTools for cross-validation assignment
folds, max_dist = block(X)