### MultiOutput Regression Models In Python

In [1]:
# check scikit-learn version
import sklearn
print(sklearn.__version__)

0.22.2.post1


In [2]:
##https://machinelearningmastery.com/multi-output-regression-models-with-python/
from sklearn.datasets import make_regression


In [3]:
## Create the dataset
X,y =make_regression(n_samples=1500,n_informative=5,n_features=10,n_targets=2)

In [4]:
X

array([[-0.10796702, -1.49451239,  0.35287741, ...,  1.32362798,
        -1.22412195,  0.84701664],
       [-0.01475647, -0.60784934,  0.72737795, ...,  0.19115423,
        -1.83936816, -0.04169088],
       [ 2.40895646,  0.66921326,  0.10293187, ..., -1.57294979,
        -0.81376977,  1.14639034],
       ...,
       [-1.30912717, -1.02134432,  0.4281954 , ...,  1.71277603,
         0.94568883, -0.1202902 ],
       [-1.70982098,  0.62095141,  1.01459588, ..., -1.19080204,
        -0.16744357,  0.82608914],
       [ 1.70538844, -0.31741795,  0.39043018, ..., -1.14016163,
         0.51034849, -1.19981783]])

In [5]:
y

array([[ 163.44635711,  145.69071966],
       [  11.96590864,  -42.49494524],
       [-141.03509745, -183.26331954],
       ...,
       [ 198.29568634,  226.58458005],
       [ -51.88647404,  -89.31664997],
       [ -12.49348983,   28.35871606]])

### Apply Linear Regression

In [6]:
from sklearn.linear_model import LinearRegression

In [7]:
lrregression=LinearRegression()
lrregression.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [8]:
test_data=[[-0.35383149,  0.39382202, -2.03033197,  0.08873402, -0.38576581,
        0.0032707 , -0.56476034, -0.67236167,  0.31317233,  1.5208706 ]]

In [9]:
lrregression.predict(test_data)

array([[-138.84531622, -114.75512011]])

### Decision Tree Regression

In [10]:
from sklearn.tree import DecisionTreeRegressor
dtregressor=DecisionTreeRegressor()
dtregressor.fit(X,y)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [12]:
dtregressor.predict(test_data)

array([[-311.03214477, -262.23520866]])

### Random Forest Regressor

In [13]:
from sklearn.ensemble import RandomForestRegressor
rdregressor=RandomForestRegressor()
rdregressor.fit(X,y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [14]:
rdregressor.predict(test_data)

array([[-131.00845014, -119.47704331]])

## CrossValidation

In [15]:
from sklearn.model_selection import cross_val_score

In [16]:
scores=cross_val_score(rdregressor,X,y,scoring='neg_mean_squared_error',cv=5)

In [17]:
print(scores)

[-2203.93866069 -1801.70593682 -2082.84110418 -2010.57673387
 -1857.4345669 ]


### All Algorithms does not support Multioutput Regressor

In [18]:
from sklearn.svm import LinearSVR
svregressor=LinearSVR()
svregressor.fit(X,y)

ValueError: bad input shape (1500, 2)

### MultiOutput Regressor sklearn

In [19]:
from sklearn.multioutput import MultiOutputRegressor

In [20]:
mulregressor=MultiOutputRegressor(svregressor)
mulregressor.fit(X,y)

MultiOutputRegressor(estimator=LinearSVR(C=1.0, dual=True, epsilon=0.0,
                                         fit_intercept=True,
                                         intercept_scaling=1.0,
                                         loss='epsilon_insensitive',
                                         max_iter=1000, random_state=None,
                                         tol=0.0001, verbose=0),
                     n_jobs=None)

In [21]:
mulregressor.predict(test_data)

array([[-138.84531622, -114.75512011]])

In [22]:
#Chained Models for Each Output (RegressorChain)

In [23]:
# example of fitting a chain of linear SVR for multioutput regression
from sklearn.datasets import make_regression
from sklearn.multioutput import RegressorChain
from sklearn.svm import LinearSVR


In [24]:
# create datasets
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=2, random_state=1)
# define model
model = LinearSVR()
wrapper = RegressorChain(model)


In [25]:
# fit model
wrapper.fit(X, y)


RegressorChain(base_estimator=LinearSVR(C=1.0, dual=True, epsilon=0.0,
                                        fit_intercept=True,
                                        intercept_scaling=1.0,
                                        loss='epsilon_insensitive',
                                        max_iter=1000, random_state=None,
                                        tol=0.0001, verbose=0),
               cv=None, order=None, random_state=None)

In [28]:
# make a prediction
data_in = [[-2.02220122, 0.31563495, 0.82797464, -0.30620401, 0.16003707, -1.44411381, 0.87616892, -0.50446586, 0.23009474, 0.76201118]]
yhat = wrapper.predict(data_in)
# summarize prediction
print(yhat[0])

[-93.147146    23.26813846]
