In [90]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest

from sklearn.base import BaseEstimator

from sklearn.utils.validation import check_X_y, check_array, check_is_fitted, column_or_1d

from tscv import GapKFold

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

%matplotlib inline

The goal here is to build and evalutate simple models based on mean meter readings (per day of week, per hour).

In [19]:
df_meters = pd.read_csv('../../data/intermediate/building_1176/train_and_building_meta_1176.csv',
                        usecols=['timestamp', 'building_id', 'meter', 'meter_reading'],
                        parse_dates=['timestamp'], index_col=['timestamp'])
df_meters.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 26345 entries, 2016-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 3 columns):
building_id      26345 non-null int64
meter            26345 non-null int64
meter_reading    26345 non-null float64
dtypes: float64(1), int64(2)
memory usage: 823.3 KB


In [20]:
df_meters.head()

Unnamed: 0_level_0,building_id,meter,meter_reading
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-01-01 00:00:00,1176,0,70.062
2016-01-01 00:00:00,1176,1,13.6612
2016-01-01 00:00:00,1176,2,0.0
2016-01-01 01:00:00,1176,0,70.062
2016-01-01 01:00:00,1176,1,13.6612


In [21]:
# Check all building ids are 1176 then drop column 'building_id'
print('building_id values : {}'.format(df_meters.building_id.unique()))
df_meters.drop('building_id', axis=1, inplace=True)
df_meters.head()

building_id values : [1176]


Unnamed: 0_level_0,meter,meter_reading
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-01 00:00:00,0,70.062
2016-01-01 00:00:00,1,13.6612
2016-01-01 00:00:00,2,0.0
2016-01-01 01:00:00,0,70.062
2016-01-01 01:00:00,1,13.6612


In [22]:
# keep only electricity meter

df_meter = df_meters[df_meters['meter']==0][['meter_reading']]
df_meter.head()

Unnamed: 0_level_0,meter_reading
timestamp,Unnamed: 1_level_1
2016-01-01 00:00:00,70.062
2016-01-01 01:00:00,70.062
2016-01-01 02:00:00,69.186
2016-01-01 03:00:00,68.678
2016-01-01 04:00:00,69.113


In [23]:
df_meter.shape

(8777, 1)

An important and tricky point is how are we going to split the data between train and validation.<br>
For time-serie data, usually we cannot do the usual random split, because of correlations.<br>
Usually, some kind of walk-forward approach is used.<br>
Here, we use hv-block cross validation : we keep a gap of unused data between train and validation, to avoid using correlated data between train and validation.<br>
This method has been studied and described by Racine (2000) : http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.30.6748&rep=rep1&type=pdf<br>
We used an open-source implementation that extends scikit-learn : http://www.zhengwenjie.net/tscv/<br>
See notebook 'test_tscv_lib.ipynb'

Let's build a mean value estimator.

From https://github.com/scikit-learn-contrib/project-template/blob/master/skltemplate/_template.py

check_array and check_X_y

https://github.com/scikit-learn/scikit-learn/blob/e5698bde9/sklearn/utils/validation.py#L904

In [24]:
# Actually we don't need that because it already exists in sklearn, under the name 'dummyRegressor'
# https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html

In [25]:
class MeanEstimator(BaseEstimator):
    """ A template estimator to be used as a reference implementation.
    For more information regarding how to build your own estimator, read more
    in the :ref:`User Guide <user_guide>`.
    Parameters
    ----------
    demo_param : str, default='demo_param'
        A parameter used for demonstation of how to pass and store paramters.
    """
    def __init__(self, demo_param='demo_param'):
        self.demo_param = demo_param

    def fit(self, X, y):
        """A reference implementation of a fitting function.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).
        Returns
        -------
        self : object
            Returns self.
        """
        
        X, y = check_X_y(X, y, accept_sparse=True)
        """Input validation for standard estimators.
    Checks X and y for consistent length, enforces X to be 2D and y 1D. By
    default, X is checked to be non-empty and containing only finite values.
    Standard input checks are also applied to y, such as checking that y
    does not have np.nan or np.inf targets. For multi-label y, set
    multi_output=True to allow 2D and sparse y. If the dtype of X is
    object, attempt converting to float, raising on failure.
    """
        
        
        self.mean_value = y.mean()
        
        
        self.is_fitted_ = True
        # `fit` should always return `self`
        return self

    
    
    
    def predict(self, X):
        """ A reference implementation of a predicting function.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        Returns
        -------
        y : ndarray, shape (n_samples,)
            Returns an array of ones.
        """
        
        
        X = check_array(X, accept_sparse=True)
        """Input validation on an array, list, sparse matrix or similar.
    By default, the input is checked to be a non-empty 2D array containing
    only finite values. If the dtype of the array is object, attempt
    converting to float, raising on failure."""
        
        check_is_fitted(self, 'is_fitted_')
        
        
        return self.mean_value * np.ones(X.shape[0], dtype=np.int64)
    
    
    
    

In [26]:
x_meter.iloc[0]

NameError: name 'x_meter' is not defined

In [None]:
y_meter.columns

In [None]:
# Lets test our estimator

mean_estimator = MeanEstimator()

In [None]:
x2 = np.array([[1],[2],[3]])

In [None]:
x2.shape

In [None]:
mean_estimator.fit(X=x2, y=[4,5,6])

In [None]:
mean_estimator.fit(x_meter, y=y_meter['meter_reading'])

In [None]:
mean_estimator.predict([x_meter.iloc[0]])

In [66]:
y_meter['meter_reading'].shape[0]


7751

In [29]:
from sklearn.dummy import DummyRegressor

In [30]:
df_features = pd.read_csv('../../data/intermediate/building_1176/features_clean_1176.csv',
                        parse_dates=['timestamp'], index_col=['timestamp'])
df_features.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7751 entries, 2016-01-04 02:00:00 to 2016-12-31 11:00:00
Data columns (total 7 columns):
day_of_week                     7751 non-null int64
dew_temperature_ma_24H          7751 non-null float64
air_temperature                 7751 non-null float64
wind_speed_ma_24H               7751 non-null float64
precip_depth_1_hr_ma_24H        7751 non-null float64
sea_level_pressure_shift_10H    7751 non-null float64
meter_reading_trend             7751 non-null float64
dtypes: float64(6), int64(1)
memory usage: 484.4 KB


In [31]:
x_meter = df_features.drop('meter_reading_trend', axis=1, inplace=False)
x_meter.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7751 entries, 2016-01-04 02:00:00 to 2016-12-31 11:00:00
Data columns (total 6 columns):
day_of_week                     7751 non-null int64
dew_temperature_ma_24H          7751 non-null float64
air_temperature                 7751 non-null float64
wind_speed_ma_24H               7751 non-null float64
precip_depth_1_hr_ma_24H        7751 non-null float64
sea_level_pressure_shift_10H    7751 non-null float64
dtypes: float64(5), int64(1)
memory usage: 423.9 KB


In [32]:
df_meter.shape

(8777, 1)

In [33]:
# keep only meter_readings for timestamps in df_features index

y_meter = df_meter.loc[df_features.index]
print(y_meter.shape)

# check that we have a meter reading for each of those timestamps
print('n timestamps with no meter readings : {}'.format(y_meter.isna().sum()))

(7751, 1)
n timestamps with no meter readings : meter_reading    0
dtype: int64


In [34]:
y_meter.describe()

Unnamed: 0,meter_reading
count,7751.0
mean,112.44
std,37.209764
min,34.8555
25%,79.4055
50%,102.886
75%,141.5335
max,264.745


In [35]:
dummy_regr = DummyRegressor(strategy="mean")

dummy_regr.fit(X=x_meter, y=y_meter)

DummyRegressor(constant=None, quantile=None, strategy='mean')

In [36]:
dummy_regr_2 = DummyRegressor(strategy="mean")

dummy_regr_2.fit(X=np.array([[1], [2]]), y=np.array([3,3.4]))

DummyRegressor(constant=None, quantile=None, strategy='mean')

In [37]:
dummy_regr_2.predict([3])

array([3.2])

In [38]:
class MeanEstimator(BaseEstimator):
    """ A template estimator to be used as a reference implementation.
    For more information regarding how to build your own estimator, read more
    in the :ref:`User Guide <user_guide>`.
    Parameters
    ----------
    demo_param : str, default='demo_param'
        A parameter used for demonstation of how to pass and store paramters.
    """
    def __init__(self, demo_param='demo_param'):
        self.demo_param = demo_param

    def fit(self, X, y):
        """A reference implementation of a fitting function.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).
        Returns
        -------
        self : object
            Returns self.
        """
        
        X, y = check_X_y(X, y, accept_sparse=True)
        """Input validation for standard estimators.
    Checks X and y for consistent length, enforces X to be 2D and y 1D. By
    default, X is checked to be non-empty and containing only finite values.
    Standard input checks are also applied to y, such as checking that y
    does not have np.nan or np.inf targets. For multi-label y, set
    multi_output=True to allow 2D and sparse y. If the dtype of X is
    object, attempt converting to float, raising on failure.
    """
        
        
        self.mean_value = y.mean()
        
        
        self.is_fitted_ = True
        # `fit` should always return `self`
        return self

    
    
    
    def predict(self, X):
        """ A reference implementation of a predicting function.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        Returns
        -------
        y : ndarray, shape (n_samples,)
            Returns an array of ones.
        """
        
        
        X = check_array(X, accept_sparse=True)
        """Input validation on an array, list, sparse matrix or similar.
    By default, the input is checked to be a non-empty 2D array containing
    only finite values. If the dtype of the array is object, attempt
    converting to float, raising on failure."""
        
        check_is_fitted(self, 'is_fitted_')
        
        
        return self.mean_value * np.ones(X.shape[0], dtype=np.int64)
    
    
    
    

In [39]:
dummy_regr.predict([1])

array([112.44000039])

In [40]:
from sklearn.model_selection import cross_val_score

In [41]:
# GapKFold
# gap ~ two weeks, train = 1 month (12 folds)

gap = 24*7*2
gap_kf = GapKFold(n_splits=12, gap_before=gap, gap_after=gap)

In [42]:
cross_val_score(
    estimator=DummyRegressor(strategy="mean"),
    X=x_meter,
    y=y_meter,
    scoring='neg_mean_squared_log_error',
    cv=gap_kf).mean()

-0.10899875476610667

In [43]:
cross_val_score(
    estimator=DummyRegressor(strategy="median"),
    X=x_meter,
    y=y_meter,
    scoring='neg_mean_squared_log_error',
    cv=gap_kf).mean()

-0.1073820404886998

In [44]:
# note : this is not the RMSLE metric defined for the competition, it is not in sklearn, we must implement it.

In [45]:
# Now let's take the mean by hour

In [46]:
df_features.head()

Unnamed: 0_level_0,day_of_week,dew_temperature_ma_24H,air_temperature,wind_speed_ma_24H,precip_depth_1_hr_ma_24H,sea_level_pressure_shift_10H,meter_reading_trend
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-04 02:00:00,0,-7.483333,-5.0,5.166667,0.0,1027.2,89.47075
2016-01-04 03:00:00,0,-7.4625,-5.0,5.058333,0.0,1027.8,92.008917
2016-01-04 05:00:00,0,-7.5125,-6.1,4.8,0.0,1027.1,97.074187
2016-01-04 06:00:00,0,-7.579167,-6.7,4.7125,0.0,1027.3,99.024417
2016-01-04 07:00:00,0,-7.670833,-7.2,4.541667,0.0,1027.5,100.4665


In [47]:
df_hours = pd.DataFrame(index=df_features.index)
df_hours['day_hour'] = df_hours.index.to_series().dt.hour
df_hours.head()

Unnamed: 0_level_0,day_hour
timestamp,Unnamed: 1_level_1
2016-01-04 02:00:00,2
2016-01-04 03:00:00,3
2016-01-04 05:00:00,5
2016-01-04 06:00:00,6
2016-01-04 07:00:00,7


In [48]:
df_hours.index

DatetimeIndex(['2016-01-04 02:00:00', '2016-01-04 03:00:00',
               '2016-01-04 05:00:00', '2016-01-04 06:00:00',
               '2016-01-04 07:00:00', '2016-01-04 08:00:00',
               '2016-01-04 10:00:00', '2016-01-04 11:00:00',
               '2016-01-04 12:00:00', '2016-01-04 13:00:00',
               ...
               '2016-12-31 01:00:00', '2016-12-31 02:00:00',
               '2016-12-31 04:00:00', '2016-12-31 05:00:00',
               '2016-12-31 06:00:00', '2016-12-31 07:00:00',
               '2016-12-31 08:00:00', '2016-12-31 09:00:00',
               '2016-12-31 10:00:00', '2016-12-31 11:00:00'],
              dtype='datetime64[ns]', name='timestamp', length=7751, freq=None)

In [49]:
df_hours.head()

Unnamed: 0_level_0,day_hour
timestamp,Unnamed: 1_level_1
2016-01-04 02:00:00,2
2016-01-04 03:00:00,3
2016-01-04 05:00:00,5
2016-01-04 06:00:00,6
2016-01-04 07:00:00,7


In [50]:
df_hours.iloc[:,0].head()

timestamp
2016-01-04 02:00:00    2
2016-01-04 03:00:00    3
2016-01-04 05:00:00    5
2016-01-04 06:00:00    6
2016-01-04 07:00:00    7
Name: day_hour, dtype: int64

In [51]:
np.array([[1],[2]]).shape

(2, 1)

In [52]:
np.array([[1],[2]]).mean()

1.5

In [53]:
a=df_hours.iloc[:,0].to_numpy()

In [54]:
a.shape

(7751,)

In [55]:
a

array([ 2,  3,  5, ...,  9, 10, 11])

In [56]:
b = df_hours.reset_index(inplace=False)
b.head()

Unnamed: 0,timestamp,day_hour
0,2016-01-04 02:00:00,2
1,2016-01-04 03:00:00,3
2,2016-01-04 05:00:00,5
3,2016-01-04 06:00:00,6
4,2016-01-04 07:00:00,7


In [57]:
b.to_numpy()

array([[Timestamp('2016-01-04 02:00:00'), 2],
       [Timestamp('2016-01-04 03:00:00'), 3],
       [Timestamp('2016-01-04 05:00:00'), 5],
       ...,
       [Timestamp('2016-12-31 09:00:00'), 9],
       [Timestamp('2016-12-31 10:00:00'), 10],
       [Timestamp('2016-12-31 11:00:00'), 11]], dtype=object)

In [58]:
b.to_numpy().shap

AttributeError: 'numpy.ndarray' object has no attribute 'shap'

In [59]:
b.to_numpy()[0,:]

array([Timestamp('2016-01-04 02:00:00'), 2], dtype=object)

In [60]:
a.shape

(7751,)

In [61]:
x3 = np.reshape(a, (-1,1))
x3.shape

(7751, 1)

In [69]:
np.unique(x3)

3

In [105]:
x_meter.reset_index(inplace=False).to_numpy()[100,0:3]

array([Timestamp('2016-01-08 17:00:00'), 4, -1.025], dtype=object)

In [109]:
x3.shape

(7751, 1)

In [161]:
class MeanByCatEstimator(BaseEstimator):
    """ A template estimator to be used as a reference implementation.
    For more information regarding how to build your own estimator, read more
    in the :ref:`User Guide <user_guide>`.
    Parameters
    ----------
    demo_param : str, default='demo_param'
        A parameter used for demonstation of how to pass and store paramters.
    """
    def __init__(self, cat_column_idx=0, demo_param='demo_param'):
        self.demo_param = demo_param
        self.cat_column_idx= cat_column_idx

    def fit(self, X, y):
        """A reference implementation of a fitting function.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).
        Returns
        -------
        self : object
            Returns self.
        """
        
        X, y = check_X_y(X, y, accept_sparse=True)
        """Input validation for standard estimators.
    Checks X and y for consistent length, enforces X to be 2D and y 1D. By
    default, X is checked to be non-empty and containing only finite values.
    Standard input checks are also applied to y, such as checking that y
    does not have np.nan or np.inf targets. For multi-label y, set
    multi_output=True to allow 2D and sparse y. If the dtype of X is
    object, attempt converting to float, raising on failure.
    """
        
        if(self.cat_column_idx >= X.shape[1]):
             raise ValueError("category column index should be < X.shape[1]")
        
        categories = {}
        self.means = {}
        
        self.mean = y.mean()
        
        for x_bin in np.unique(X[:, self.cat_column_idx]):
            categories[x_bin] = []
            
        print('categories : {}'.format(categories.keys()))
            
        for k in range(X.shape[0]):
            categories[X[k, self.cat_column_idx]].append(y[k])
        
        for k, v in categories.items():
            self.means[k] = np.array(v).mean()
        
        self.is_fitted_ = True
        # `fit` should always return `self`
        
        
        for k, v in self.means.items():
            print('({}, {})'.format(k, v))
        
        
        return self

    
    
    
    def predict(self, X):
        """ A reference implementation of a predicting function.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        Returns
        -------
        y : ndarray, shape (n_samples,)
            Returns an array of ones.
        """
        
        
        X = check_array(X, accept_sparse=True)
        """Input validation on an array, list, sparse matrix or similar.
    By default, the input is checked to be a non-empty 2D array containing
    only finite values. If the dtype of the array is object, attempt
    converting to float, raising on failure."""
        
        check_is_fitted(self, 'is_fitted_')
        
        predictions = []
        
        for sample_cat in X[:, self.cat_column_idx]:
            cat_mean = self.means.get(sample_cat)
            if(cat_mean == None):
                predictions.append(self.mean)
            else:
                predictions.append(cat_mean)
            
        
        
        return np.array(predictions)
    
    
    
    

In [162]:
x_meter.index.shape

(7751,)

In [163]:
y_meter.shape

(7751, 1)

In [164]:
x3.shape

(7751, 1)

In [165]:
x_with_hours = x_meter.copy()
x_with_hours['h'] = x3
x_with_hours.head()

Unnamed: 0_level_0,day_of_week,dew_temperature_ma_24H,air_temperature,wind_speed_ma_24H,precip_depth_1_hr_ma_24H,sea_level_pressure_shift_10H,h
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-04 02:00:00,0,-7.483333,-5.0,5.166667,0.0,1027.2,2
2016-01-04 03:00:00,0,-7.4625,-5.0,5.058333,0.0,1027.8,3
2016-01-04 05:00:00,0,-7.5125,-6.1,4.8,0.0,1027.1,5
2016-01-04 06:00:00,0,-7.579167,-6.7,4.7125,0.0,1027.3,6
2016-01-04 07:00:00,0,-7.670833,-7.2,4.541667,0.0,1027.5,7


In [166]:
myEst = MeanByCatEstimator(cat_column_idx=6)

In [167]:
x_with_hours.to_numpy().shape

(7751, 7)

In [187]:
myEst.fit(x_with_hours, y_meter)

categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 85.44913230769231)
(1.0, 80.9057106918239)
(2.0, 77.24749242424242)
(3.0, 75.8257921875)
(4.0, 75.43799386503068)
(5.0, 75.71224702380952)
(6.0, 76.16310946745563)
(7.0, 89.09952071005917)
(8.0, 101.37205325443787)
(9.0, 121.24868484848484)
(10.0, 134.5447002967359)
(11.0, 141.08189969604862)
(12.0, 144.13435757575758)
(13.0, 146.5302950310559)
(14.0, 147.8204025157233)
(15.0, 147.98294788273614)
(16.0, 148.03377096774193)
(17.0, 144.4900734824281)
(18.0, 133.85397411003234)
(19.0, 125.90178456591639)
(20.0, 119.86309904153353)
(21.0, 114.30286666666666)
(22.0, 107.99530959752322)
(23.0, 92.44874603174603)


  y = column_or_1d(y, warn=True)


MeanByCatEstimator(cat_column_idx=6, demo_param='demo_param')

In [174]:
t_6 = np.array([0,3,5,6,8,10,15,2,5,7,22,27])


In [185]:
t = np.transpose(np.array([np.ones(12, dtype=np.int64),
          np.ones(12, dtype=np.int64),
          np.ones(12, dtype=np.int64),
          np.ones(12, dtype=np.int64),
          np.ones(12, dtype=np.int64),
          np.ones(12, dtype=np.int64),
          t_6,
          np.ones(12, dtype=np.int64)]))

In [186]:
myEst.predict(t)

array([ 85.44913231,  75.82579219,  75.71224702,  76.16310947,
       101.37205325, 134.5447003 , 147.98294788,  77.24749242,
        75.71224702,  89.09952071, 107.9953096 , 112.44000039])

In [188]:
cross_val_score(
    estimator=MeanByCatEstimator(cat_column_idx=6),
    X=x_with_hours,
    y=y_meter,
    scoring='neg_mean_squared_log_error',
    cv=gap_kf).mean()

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 84.8521170212766)
(1.0, 80.7324784172662)
(2.0, 77.03253859649124)
(3.0, 75.58823646209386)
(4.0, 75.27731578947369)
(5.0, 75.56354607508533)
(6.0, 76.06978305084746)
(7.0, 88.2856)
(8.0, 100.86652027027026)
(9.0, 121.0012896551724)
(10.0, 134.53331506849315)
(11.0, 141.3168780487805)
(12.0, 144.32095189003437)
(13.0, 146.75153521126762)
(14.0, 147.78677857142858)
(15.0, 148.10698148148145)
(16.0, 148.23752205882352)
(17.0, 144.03957299270073)
(18.0, 133.48166911764704)
(19.0, 125.38708921933086)
(20.0, 119.7651208791209)
(21.0, 114.10951824817518)
(22.0, 107.64665714285715)
(23.0, 91.59703272727273)
categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 83.97857249070631)
(1.0, 79.52764503816795)
(2.0, 76.151563636

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 84.28818014705882)
(1.0, 79.88532061068703)
(2.0, 76.67683032490974)
(3.0, 75.27683396226415)
(4.0, 74.79980669144982)
(5.0, 75.16579285714285)
(6.0, 75.63232974910393)
(7.0, 88.29065357142858)
(8.0, 100.27755395683454)
(9.0, 119.69134317343173)
(10.0, 132.81567985611514)
(11.0, 139.2428081180812)
(12.0, 142.2189594095941)
(13.0, 143.88465413533834)
(14.0, 144.72415849056605)
(15.0, 144.74577343750002)
(16.0, 145.1279233716475)
(17.0, 141.15151538461538)
(18.0, 131.0910772200772)
(19.0, 124.12422433460077)
(20.0, 116.8417859922179)
(21.0, 111.78218939393939)
(22.0, 105.54095522388059)
(23.0, 90.64873180076629)
categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 84.39905185185185)
(1.0, 80.60708778625954)
(2.0, 77

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 86.06858208955222)
(1.0, 81.45657735849058)
(2.0, 77.88552372262774)
(3.0, 76.39129400749064)
(4.0, 76.03379104477612)
(5.0, 76.27347670250894)
(6.0, 76.68779715302492)
(7.0, 89.83426164874552)
(8.0, 101.87463571428572)
(9.0, 121.93102930402931)
(10.0, 135.2524306049822)
(11.0, 141.98615693430656)
(12.0, 145.12358241758244)
(13.0, 147.76184905660378)
(14.0, 149.20395785440613)
(15.0, 149.00169411764705)
(16.0, 149.35080308880308)
(17.0, 145.97366795366793)
(18.0, 135.21138431372552)
(19.0, 126.49393436293437)
(20.0, 120.8544030418251)
(21.0, 115.76451515151516)
(22.0, 108.55618587360595)
(23.0, 93.06261450381679)
categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 86.8269925093633)
(1.0, 81.6539097744361)
(2.0, 7

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 87.215202247191)
(1.0, 82.1500977443609)
(2.0, 78.13490398550725)
(3.0, 76.78411132075472)
(4.0, 76.14877859778599)
(5.0, 76.27542391304348)
(6.0, 76.83519285714286)
(7.0, 89.1458480565371)
(8.0, 100.2249)
(9.0, 118.66325641025642)
(10.0, 133.38415412186382)
(11.0, 141.18148175182483)
(12.0, 144.365615942029)
(13.0, 147.71204887218045)
(14.0, 149.72218631178706)
(15.0, 150.86533333333333)
(16.0, 150.72833858267714)
(17.0, 147.71010687022903)
(18.0, 136.87817578125)
(19.0, 128.5808)
(20.0, 123.2565534351145)
(21.0, 117.15835741444866)
(22.0, 110.75423880597015)
(23.0, 94.38047126436781)
categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 87.96362081784386)
(1.0, 83.07769029850746)
(2.0, 79.11370833333334)
(3.0, 77

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 85.83220295202953)
(1.0, 81.28649056603773)
(2.0, 77.56749267399266)
(3.0, 76.05267748091603)
(4.0, 75.63852205882353)
(5.0, 75.94116606498196)
(6.0, 76.24322222222223)
(7.0, 90.05270106761566)
(8.0, 102.57718411552347)
(9.0, 122.20444043321301)
(10.0, 134.62888380281692)
(11.0, 140.30622463768117)
(12.0, 143.78757246376813)
(13.0, 146.26403007518797)
(14.0, 147.4942067669173)
(15.0, 148.10289763779528)
(16.0, 147.87780859375)
(17.0, 144.92461627906977)
(18.0, 133.8583098039216)
(19.0, 126.5448671875)
(20.0, 120.4405938697318)
(21.0, 115.07181609195402)
(22.0, 109.2811086142322)
(23.0, 93.48106463878325)
categories : dict_keys([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0])
(0.0, 84.4010036900369)
(1.0, 79.92446183206107)
(2.0, 76.141130

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


-0.044998170800415856