In [1]:
import pandas as pd
import scipy
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from math import log2
from joblib import parallel_backend

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from mlinsights.mlmodel.piecewise_tree_regression import PiecewiseTreeRegressor
import copy

In [2]:
df_train = pd.read_csv('DES_TRAIN.csv')
df_test = pd.read_csv('DES_TEST.csv')

In [3]:
def embed(df):
    key = df['Key'].to_numpy()
    inputs = df.iloc[:, :(df.shape[1] - 1)].values
    
    inputs = inputs.copy(order='C')
    key = key.copy(order='C')
    
    return inputs.astype('float64'), key.astype('float64')

In [4]:
inputs, labels = embed(df_train)

In [5]:
model = PiecewiseTreeRegressor(criterion='mselin')

with parallel_backend('threading', n_jobs=10):
    model.fit(inputs, labels)

In [None]:
df_train.head()

In [None]:
predictions = model.predict(inputs)
error = np.abs(predictions - labels)
mean_error = np.mean(np.array([x for x in error if x > 0]))
max_error = np.max(error)
print('Average train error: ', mean_error)
print('Average exponent train error: ', log2(mean_error))
print('Max train error: ', max_error)
print('Max exponent train error: ', log2(max_error))

In [None]:
inputs_test, labels_test = embed(df_test)

In [None]:
print(labels[:5])
df_test.head()

In [None]:
print(np.abs(np.array(model.predict(inputs_test[:5])) - np.array(labels_test[:5])))

In [None]:
predictions_test = model.predict(inputs_test)
error = np.abs(predictions_test - labels_test)
mean_error = np.mean(np.array([x for x in error if x > 0]))
max_error = np.max(error)
print('Average error: ', mean_error)
print('Average exponent error: ', log2(mean_error))
print('Max error: ', max_error)
print('Max exponent error: ', log2(max_error))