In [1]:
import numpy
import pandas

feature_names = ["Age", "Gender", "Married", "IncomeC",
                 "HealthC", "ChildC", "LifeSatC", "SES",
                 "Smoke", "Spirit", "Finish", "LifeSat",
                 "Income"]

data = numpy.array([
[16, 0, 0,  0, 38, 0, 17, 17, 1, 30, 1, 22, 26],
[28, 1, 0,  0, 38, 0, 16, 21, 1, 39, 1, 20, 15],
[16, 0, 1, 16, 52, 1, 39, 40, 0, 30, 1, 42, 88],
[23, 1, 0,  6, 51, 0, 22, 31, 0, 60, 1, 48, 73],
[30, 0, 1, 25, 43, 2, 53, 36, 1, 39, 0, 33, 38],
[19, 0, 1, 19, 55, 0, 28, 41, 0, 51, 1, 33, 45],
[19, 1, 0,  0, 52, 2, 17, 52, 0, 35, 1, 21, 16],
[34, 0, 0, 29, 60, 2, 20, 56, 0, 23, 1, 26, 64],
[16, 1, 0,  0, 53, 0, 21, 27, 0, 29, 0, 37, 19],
[25, 1, 0,  3, 39, 0, 18, 34, 1, 61, 1, 40, 56],
[16, 1, 1,  1, 42, 0, 31, 29, 1, 58, 1, 35, 70],
[16, 0, 0,  0, 43, 0, 15, 28, 1, 39, 1, 32, 71],
[16, 0, 1, 18, 54, 1, 34, 38, 0, 40, 0, 37, 44],
[16, 1, 0,  0, 52, 0, 20, 38, 0, 27, 1, 35, 25],
[32, 1, 1, 26, 54, 1, 39, 37, 0, 30, 1, 47, 38],
[19, 0, 0,  0, 46, 0, 17, 25, 0, 36, 1, 26, 39],
[17, 1, 1, 10, 55, 2, 48, 53, 0, 43, 0, 42,  6],
[24, 0, 0, 17, 52, 0, 16, 36, 0, 54, 1, 38, 75],
[26, 1, 1,  0, 57, 1, 39, 41, 0, 32, 1, 42, 67]
])

df = pandas.DataFrame(data, columns = [["X"] * 11 + ["Y"] * 2, feature_names])

df

Unnamed: 0_level_0,X,X,X,X,X,X,X,X,X,X,X,Y,Y
Unnamed: 0_level_1,Age,Gender,Married,IncomeC,HealthC,ChildC,LifeSatC,SES,Smoke,Spirit,Finish,LifeSat,Income
0,16,0,0,0,38,0,17,17,1,30,1,22,26
1,28,1,0,0,38,0,16,21,1,39,1,20,15
2,16,0,1,16,52,1,39,40,0,30,1,42,88
3,23,1,0,6,51,0,22,31,0,60,1,48,73
4,30,0,1,25,43,2,53,36,1,39,0,33,38
5,19,0,1,19,55,0,28,41,0,51,1,33,45
6,19,1,0,0,52,2,17,52,0,35,1,21,16
7,34,0,0,29,60,2,20,56,0,23,1,26,64
8,16,1,0,0,53,0,21,27,0,29,0,37,19
9,25,1,0,3,39,0,18,34,1,61,1,40,56


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

train, test = train_test_split(df)

print("train size:", len(train))
print("test  size:", len(test))

train size: 14
test  size: 5


In [3]:
prediction = SVR(C=10, gamma=0.001).fit(train.X, train.Y.LifeSat).predict(test.X)

result = test.copy()
result["prediction", "LifeSat"] = prediction
result.xs("LifeSat", level=1, axis=1)

Unnamed: 0,Y,prediction
13,35,36.457075
18,42,41.138579
5,33,38.723661
4,33,41.300296
6,21,37.472411


In [4]:
try:
    prediction = SVR(C=10, gamma=0.001).fit(train.X, train.Y)
except ValueError as e:
    print(e)

bad input shape (14, 2)


In [5]:
from sklearn.multioutput import MultiOutputRegressor

prediction = MultiOutputRegressor(SVR(C=10, gamma=0.001)).fit(train.X, train.Y).predict(test.X)

result = pandas.concat(
    [test, pandas.DataFrame(prediction,
                            index=test.index.values,
                            columns=[["prediction"]*2, test.Y.columns.values])],
    axis=1)
result[["Y", "prediction"]]

Unnamed: 0_level_0,Y,Y,prediction,prediction
Unnamed: 0_level_1,LifeSat,Income,LifeSat,Income
13,35,25,36.457075,39.800546
18,42,67,41.138579,39.652655
5,33,45,38.723661,51.595362
4,33,38,41.300296,39.401329
6,21,16,37.472411,46.375748


In [6]:
from functools import partial
from sklearn.metrics.pairwise import rbf_kernel

from multivariate_svr import MultivariateSVR

prediction = MultivariateSVR(C=10, kernel_func=partial(rbf_kernel, gamma=0.001)).fit(train.X, train.Y).predict(test.X)

result = pandas.concat(
    [test, pandas.DataFrame(prediction,
                            index=test.index.values,
                            columns=[["prediction MultiSVR"]*2, test.Y.columns.values])],
    axis=1)
result[["Y", "prediction MultiSVR"]]

Unnamed: 0_level_0,Y,Y,prediction MultiSVR,prediction MultiSVR
Unnamed: 0_level_1,LifeSat,Income,LifeSat,Income
13,35,25,36.457075,39.800546
18,42,67,41.138579,39.652655
5,33,45,38.723661,51.595362
4,33,38,41.300296,39.401329
6,21,16,37.472411,46.375748


In [10]:
%timeit MultiOutputRegressor(SVR(C=10, gamma=0.001)).fit(train.X, train.Y * 200)
%timeit MultivariateSVR(C=10, kernel_func=partial(rbf_kernel, gamma=0.001)).fit(train.X, train.Y * 200)

100 loops, best of 3: 3.63 ms per loop
100 loops, best of 3: 2.74 ms per loop


In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "C":     [10** exp for exp in range(6)],
    "gamma": [10**-exp for exp in range(6)]
}

grid_search = GridSearchCV(SVR(), param_grid)

prediction = grid_search.fit(train.X, train.Y.LifeSat).predict(test.X)

print("found params:    ", grid_search.best_params_)

result = test.copy()
result["prediction CV", "LifeSat"] = prediction
result.xs("LifeSat", level=1, axis=1)

found params:     {'gamma': 0.001, 'C': 100}


Unnamed: 0,Y,prediction CV
13,35,40.793428
18,42,44.890324
5,33,36.945192
4,33,44.914439
6,21,39.977647


In [9]:
multi_grid_search = MultiOutputRegressor(grid_search)
prediction = multi_grid_search.fit(train.X, train.Y).predict(test.X)

result = pandas.concat(
    [test, pandas.DataFrame(prediction,
                            index=test.index.values,
                            columns=[["prediction CV"]*2, test.Y.columns.values])],
    axis=1)

params = [estimator.best_params_ for estimator in multi_grid_search.estimators_]
# wait for https://github.com/scikit-learn/scikit-learn/pull/7735
# params = multi_grid_search.get_estimators_attributes("best_params_")
print("found params:    ", params)

result[["Y", "prediction CV"]]

found params:     [{'gamma': 0.001, 'C': 100}, {'gamma': 0.01, 'C': 100}]


Unnamed: 0_level_0,Y,Y,prediction CV,prediction CV
Unnamed: 0_level_1,LifeSat,Income,LifeSat,Income
13,35,25,40.793428,41.490668
18,42,67,44.890324,49.150271
5,33,45,36.945192,49.184391
4,33,38,44.914439,48.419019
6,21,16,39.977647,48.57666
