In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.linear_model import BayesianRidge
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

In [None]:
class TrainTestHelper:
    """Combine/split datasets."""
    def __init__(self):
        self.ntrain = None

    def combine(self, train, test):
        """Combine 2 np.ndarrays or dataframes."""
        self.ntrain = train.shape[0]
        if isinstance(train, np.ndarray):
            return np.row_stack((train, test))
        return train.append(test, sort=False).reset_index(drop=True)

    def split(self, train_test):
        """Split np.ndarray or dataframe split into 2 entities."""
        if self.ntrain is None:
            return None
        if isinstance(train_test, np.ndarray):
            train = train_test[:self.ntrain, :]
            test = train_test[self.ntrain:, :]
        else:
            train = train_test.iloc[:self.ntrain, :].copy().reset_index(drop=True)
            test = train_test.iloc[self.ntrain:, :].copy().reset_index(drop=True)
        return train, test

def rmse(y_true, y_pred):
    "Root Mean Squared Error."
    return mean_squared_error(y_true, y_pred)**0.5

In [None]:
# Load files
train = pd.read_csv('Train.csv')
test = pd.read_csv('Test.csv')
ss = pd.read_csv('SampleSubmission.csv')

In [None]:
train = train[train['open'].notnull()]
y = train.pop('close')

helper = TrainTestHelper()
data = helper.combine(train, test)
del data['id'], data['asset_id']

In [None]:
# features
features = ['open', 'high', 'low']
data = data.fillna(0)

train, test = helper.split(data[features])
folds = list(KFold(n_splits=5, shuffle=True, random_state=0).split(train))

In [None]:
model = BayesianRidge()
model.fit(train, y, sample_weight=(y < 45_000) + 0)

ss = pd.read_csv('SampleSubmission.csv')
ss['close'] = model.predict(test).clip(0, np.inf)
ss.loc[test['open'] == 0, 'close'] = 0

In [None]:
sub = pd.read_csv('gbm_final.csv')
ss.loc[test['open'] > 40_000, 'close'] = sub.loc[test['open'] > 40_000, 'close']
ss.loc[ss['close'] > test['high'], 'close'] = test.loc[ss['close'] > test['high'], 'high']
ss.loc[ss['close'] < test['low'], 'close'] = test.loc[ss['close'] < test['low'], 'low']
ss.to_csv('blend_final_v5.csv', index=False)