In [1]:
import numpy as np
import pandas as pd

from scipy.spatial.distance import pdist, squareform

from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn import metrics

from sklearn.linear_model import *
from sklearn.isotonic import IsotonicRegression

def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:15])
    return score, all_nrmse

df = pd.read_csv('abnormal_train.csv')

train_df, valid_df = train_test_split(df, train_size=0.9)

train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

valid_x = valid_df.filter(regex='X') # Input : X Featrue
valid_y = valid_df.filter(regex='Y') # Output : Y Feature

LR = MultiOutputRegressor(LinearRegression()).fit(train_x, train_y)

valid_preds = LR.predict(valid_x)
lg_nrmse(valid_y.values, valid_preds)

(3.098602750364134,
 [0.4138782459952281,
  0.4999670889667189,
  0.5027225514173277,
  0.27592706318341415,
  0.1118144161172736,
  0.3187980147920446,
  0.22228031227360895,
  0.03198852583187013,
  0.031774957365822434,
  0.06868538069989072,
  0.04950684898593225,
  0.0319392293976213,
  0.03181785717303569,
  0.03202701444884822])

In [2]:
model_list = [LinearRegression, RidgeCV, SGDRegressor, ElasticNetCV, LarsCV, LassoLarsCV, LassoLarsIC,
              OrthogonalMatchingPursuit, ARDRegression, BayesianRidge, HuberRegressor, RANSACRegressor,
              TheilSenRegressor, PoissonRegressor, TweedieRegressor, GammaRegressor, IsotonicRegression]

In [3]:
for model in model_list:
    LR = MultiOutputRegressor(model()).fit(train_x, train_y)
    valid_preds = LR.predict(valid_x)
    print(model)
    print(lg_nrmse(valid_y.values, valid_preds))

<class 'sklearn.linear_model._base.LinearRegression'>
(3.098602750364134, [0.4138782459952281, 0.4999670889667189, 0.5027225514173277, 0.27592706318341415, 0.1118144161172736, 0.3187980147920446, 0.22228031227360895, 0.03198852583187013, 0.031774957365822434, 0.06868538069989072, 0.04950684898593225, 0.0319392293976213, 0.03181785717303569, 0.03202701444884822])
<class 'sklearn.linear_model._ridge.RidgeCV'>
(3.091815421846318, [0.4132644523714602, 0.4982106122702692, 0.5006142524061072, 0.27271166166259464, 0.11195197394365942, 0.32209007820084007, 0.22054568465173124, 0.03209678877338952, 0.03175093268534207, 0.06916810414509832, 0.04907944891596705, 0.03202949551199405, 0.031900415077355165, 0.03210442037449985])
<class 'sklearn.linear_model._stochastic_gradient.SGDRegressor'>
(2.586188501991841e+17, [6.2309064211964504e+16, 1.0132479422157152e+17, 1.7060884684802636e+16, 3175409239496332.0, 977755373534433.0, 1.0012256979221452e+16, 5704677294064017.0, 2880913978544625.5, 3104163081



<class 'sklearn.linear_model._least_angle.LarsCV'>
(3.1190291060704207, [0.42527318871057007, 0.5024899011299532, 0.5044011352555904, 0.27799527476429114, 0.11222385178543595, 0.3189691248560484, 0.21834337995249636, 0.03255922152459382, 0.032028352474076004, 0.06912514149739733, 0.04922576215709954, 0.03282050630314614, 0.03237678561518508, 0.03274646444874124])




<class 'sklearn.linear_model._least_angle.LassoLarsCV'>
(3.0901652124437877, [0.41346259707634686, 0.5005640323507178, 0.501628990850315, 0.2730052597992927, 0.1116843213873577, 0.3196816483505982, 0.21819166254450698, 0.0321076207442003, 0.03167755493796888, 0.06900104314785209, 0.04913508037104964, 0.03196835786717485, 0.031870703826600094, 0.032121112569139396])
<class 'sklearn.linear_model._least_angle.LassoLarsIC'>
(3.090438068090736, [0.41372264307282586, 0.5005330975217807, 0.5017673951772217, 0.273112837050041, 0.11167183203230373, 0.3196660478396382, 0.2182300816930573, 0.03201235860143424, 0.03163684843718867, 0.06904462749307498, 0.04924827975388389, 0.03187558442648331, 0.03177426414373499, 0.03199891225040696])
<class 'sklearn.linear_model._omp.OrthogonalMatchingPursuit'>
(3.1178301921965788, [0.4159961952127074, 0.5044112736351649, 0.505489880428762, 0.2753646237945698, 0.11433088815253649, 0.32518999031955065, 0.21913275846318864, 0.03234663669538846, 0.03176531086644412

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

<class 'sklearn.linear_model._huber.HuberRegressor'>
(3.212273314493684, [0.42743221757275274, 0.5147172849842211, 0.5309471819406486, 0.27430489942502034, 0.11607149383853717, 0.3410390627629956, 0.22989683984894152, 0.03291717866651582, 0.03233376934841897, 0.07135911075317299, 0.049553623450671824, 0.032774763400136055, 0.03268098289147739, 0.03277967380224743])
<class 'sklearn.linear_model._ransac.RANSACRegressor'>
(29.968796507262592, [3.8995981665716304, 4.100212247053553, 5.049897760610234, 5.594634001840573, 2.255997720022684, 0.8837050168547068, 1.419991656926246, 0.2552279577941626, 0.16630449340536071, 0.21021711581815075, 0.5166304155898079, 0.42619469162272977, 0.1847069863272904, 0.3136253712907101])
<class 'sklearn.linear_model._theil_sen.TheilSenRegressor'>
(5.20043939709133, [0.697620488097087, 0.7815626126289513, 0.6068056796110524, 0.4379485883975646, 0.13194239597783297, 0.7401609922400137, 0.2604372247918225, 0.11363016674687192, 0.03719184357691233, 0.276425874140

  return np.exp(lin_pred)
  return np.exp(lin_pred)
  return -2 * (y - y_pred) / self.unit_variance(y_pred)
  dev = 2 * (xlogy(y, y/y_pred) - y + y_pred)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
  return np.exp(lin_pred)
  return np.exp(lin_pred)
  return -2 * (y - y_pred) / self.unit_variance(y_pred)
  dev = 2 * (xlogy(y, y/y_pred) - y + y_pred)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
  return np.exp(lin_pred)
  return np.exp(lin_pred)
  return -2 * (y - y_pred) / self.unit_variance(y_pred)
  dev = 2 * (xlogy(y, y/y_pred) - y + y_pred)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) 

ValueError: Some value(s) of y are out of the valid range for family PoissonDistribution

In [4]:
for model in [TweedieRegressor, IsotonicRegression]:
    LR = MultiOutputRegressor(model()).fit(train_x, train_y)
    valid_preds = LR.predict(valid_x)
    print(model)
    print(lg_nrmse(valid_y.values, valid_preds))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATION

<class 'sklearn.linear_model._glm.glm.TweedieRegressor'>
(3.1511334555705366, [0.4215487758969525, 0.5083150136313138, 0.5102883906905396, 0.27230438055695705, 0.11301689872341077, 0.33118879796856276, 0.2297046542363717, 0.03261882731073225, 0.03200687689126397, 0.06986610352628685, 0.049199968170411564, 0.032487834810294616, 0.03230171789673407, 0.032488067457736775])


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)


ValueError: X should be a 1d array