TODO: Redo as a lasso type fit from sklearn. Divide the sample into 3 sets (train test validate) for validating the size of the hyperparameter for the lasso. You'll need to take the multipolyfit for dividing your z's into the appropriate basis.

In [2]:
%matplotlib inline

In [3]:
# load up relevant packages
from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sympy as sp
from sklearn.cross_validation import train_test_split
from sympy import pprint

multipolyfit from https://github.com/mrocklin/multipolyfit/blob/master/multipolyfit/core.py

In [4]:
from numpy import linalg, zeros, ones, hstack, asarray
from itertools import combinations_with_replacement

def basis_vector(n, i):
    """ Return an array like [0, 0, ..., 1, ..., 0, 0]
    >>> from multipolyfit.core import basis_vector
    >>> basis_vector(3, 1)
    array([0, 1, 0])
    >>> basis_vector(5, 4)
    array([0, 0, 0, 0, 1])
    """
    x = zeros(n, dtype=int)
    x[i] = 1
    return x

def as_tall(x):
    """ Turns a row vector into a column vector """
    return x.reshape(x.shape + (1,))

def stack_x(xs, deg):

    num_covariates = xs.shape[1]
    xs = hstack((ones((xs.shape[0], 1), dtype=xs.dtype) , xs))

    generators = [basis_vector(num_covariates+1, i)
                  for i in range(num_covariates+1)]

    # All combinations of degrees
    powers = map(sum, combinations_with_replacement(generators, deg))

    # Raise data to specified degree pattern, stack in order
    A = hstack(asarray([as_tall((xs**p).prod(1)) for p in powers]))

    return A, powers

def power_selector(zi, zj, powers, beta):
    powersum = np.sum(powers[0])
    powers_desired = np.zeros(len(powers[0]), dtype=np.int)
    powers_desired[zi - 3] += 1
    powers_desired[zj - 3] += 1
    powers_desired[0] = powersum - np.sum(powers_desired)
    ith = np.array([np.all(power == powers_desired) for power in powers])
    return beta[ith]

def mk_model(beta, powers):
    """ Create a callable python function out of beta/powers from multipolyfit
    This function is callable from within multipolyfit using the model_out flag
    """
    # Create a function that takes in many x values
    # and returns an approximate y value
    def model(*args):
        num_covariates = len(powers[0]) - 1
        if len(args)!=(num_covariates):
            raise ValueError("Expected %d inputs"%num_covariates)
        xs = asarray((1,) + args)
        return sum([coeff * (xs**p).prod()
                             for p, coeff in zip(powers, beta)])
    return model

def mk_pretty_function(beta, powers):
    num_covariates = len(powers[0]) - 1
    xs = [''] + ['z{0}'.format(i) for i in xrange(4, 12)]
    terms = []
    for ith in xrange(len(beta)):
        coef = beta[ith]
        power = powers[ith]
        term = '{0:.2e}'.format(coef)
        if term.count('0') == 5:
            continue
        term += ' '
        for power_ith in xrange(len(power)):
            term_power = xs[power_ith]
            number_terms = power[power_ith]
            for j in xrange(number_terms):
                term += term_power
        if len(term) > 0:
            terms.append(term)
    return terms

def mk_sympy_function(beta, powers):
    from sympy import symbols, Add, Mul, S
    num_covariates = len(powers[0]) - 1
    xs = (S.One,) + symbols('z4:%d'%(num_covariates+4))
    return Add(*[coeff * Mul(*[x**deg for x, deg in zip(xs, power)])
                        for power, coeff in zip(powers, beta)])

In [5]:
df_in = pd.read_csv('/Users/cpd/Projects/WavefrontPSF/meshes/donuts_fixed_rzeros.csv', index_col=0)

In [6]:

# select rzero near 0.10, x near 10, y near 0
selection = np.isclose(df_in['rzero'], 0.10) * np.isclose(df_in['x'], 10) * np.isclose(df_in['y'], 0)
df = df_in[selection]

x_keys = []
for zi in xrange(4, 12):
    zkey = 'z{0}'.format(zi)
    x_keys.append(zkey)

y_keys = ['flux', 'Mx', 'My', 'e0prime', 'e0', 'e1', 'e2',
          'delta1', 'delta2', 'zeta1', 'zeta2']

In [7]:
# stack xs
deg = 3  # want 2d polynomial

xs = df[x_keys].values
ys = df[y_keys].values

x_powers, powers = stack_x(xs, deg)

# # normalize the data
# x_powers_std = x_powers.std(axis=0)
# x_powers[:, 1:] /= x_powers_std[1:]

# split dataset. 0.6 train, 0.2 val, 0.2 test
x_powers_train, x_powers_test, ys_train, ys_test = train_test_split(x_powers, ys, test_size=0.4)
x_powers_test, x_powers_val, ys_test, ys_val = train_test_split(x_powers_test, ys_test, test_size=0.5)

In [8]:
from sklearn.linear_model import Lasso, LassoLars, Ridge, LinearRegression

fit_intercept=False
normalize=False
max_iter=10000
tol=1e-8
positive=False
best_model = None
score = -1000

alpha = 1e-3

In [9]:
linear = LinearRegression(fit_intercept=fit_intercept, normalize=normalize)
linear.fit(x_powers_train, ys_train)
print('linear', linear.score(x_powers_test, ys_test))

lasso = Lasso(alpha=alpha, normalize=normalize, positive=positive,
                      fit_intercept=fit_intercept,
                      tol=tol, max_iter=max_iter)
lasso.fit(x_powers_train, ys_train)
print('lasso', lasso.score(x_powers_test, ys_test))   

# ridge = Ridge(alpha=alpha, fit_intercept=fit_intercept, tol=tol, max_iter=max_iter, normalize=normalize)
# ridge.fit(x_powers_train, ys_train)
# print('ridge', ridge.score(x_powers_test, ys_test))  

linear 0.981299345186
lasso 0.978102483017


In [10]:
from IPython.html.widgets import interact

def func(const_txt='1.', yith=2, regression='lasso'):
    if regression == 'ridge':
        regressor = ridge
    elif regression == 'lasso':
        regressor = lasso
    elif regression == 'linear':
        regressor = linear

    const = float(const_txt)
    beta = (const * regressor.coef_)[yith]

    terms = mk_pretty_function(beta, powers)
    # print the terms
    string = ''
    for term in terms:
        string += term
        string += ' + '
    print(string)
interact(func, yith=(0, 8), regression=['ridge', 'lasso', 'linear'])

1.59e+01  + -1.00e+00 z7 + -1.61e-02 z9 + 3.33e-02 z4z4z7 + 8.42e-02 z4z5z8 + -5.02e-02 z4z5z10 + -9.02e-02 z4z6z7 + 5.56e-02 z4z6z9 + 4.92e-01 z4z7z11 + 5.28e-02 z5z6z10 + 2.44e-01 z5z8z11 + -9.19e-02 z5z10z11 + -1.69e-02 z6z6z9 + -2.47e-01 z6z7z11 + 9.82e-02 z6z9z11 + -9.93e-03 z7z8z10 + 5.35e-01 z7z11z11 + 


<function __main__.func>