In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, make_scorer, r2_score
from sklearn.model_selection import cross_val_score, ShuffleSplit, train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, PowerTransformer, StandardScaler
from sklearn.pipeline import Pipeline
from catboost import CatBoostRegressor
from tmpnn import Regression
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=1)

In [4]:
#sarcos
import scipy.io
mat = scipy.io.loadmat('sarcos_inv.mat')
sarcos_df = pd.DataFrame(mat['sarcos_inv'])

In [68]:
for i in range(7):
    X, y = sarcos_df.iloc[:,:-7], sarcos_df.iloc[:,-7+i]
    scores = cross_val_score(CatBoostRegressor(verbose=False), X, y, cv=cv, scoring=make_scorer(mean_squared_error))
    print(f"CatBoost: {scores.mean():.2f} {scores.std():.2f} ({i})")

CatBoost: 7.95 0.17 (0)
CatBoost: 3.89 0.21 (1)
CatBoost: 1.24 0.06 (2)
CatBoost: 0.92 0.04 (3)
CatBoost: 0.02 0.00 (4)
CatBoost: 0.07 0.00 (5)
CatBoost: 0.06 0.00 (6)


In [14]:
X, y = sarcos_df.iloc[:,:-7].values, sarcos_df.iloc[:,-7:].values
X = MinMaxScaler((-0.5,0.5)).fit_transform(X)
Yscaler = MinMaxScaler((-0.5, 0.5))
y_tr = Yscaler.fit_transform(y)
scores = {}
for train, test in tqdm(cv.split(X, y_tr), leave=False):
    model = Regression(X.shape[1],7,2,2,is_scale=False, learning_rate=1e-2) 

    history1 = model.fit(X[train], y_tr[train], epochs=1500, verbose=0)

    pred = Yscaler.inverse_transform(model.predict(X[test]))
    for i in range(7):
        scores[i] = (mean_squared_error(y[test,i], pred[:,i]))
for i in range(7):
    print(f"TmPNN: {np.mean(scores[i]):.2f} {np.std(scores[i]):.2f} ({i})")

TmPNN: 7.94 0.00 (0)
TmPNN: 3.22 0.00 (1)
TmPNN: 0.82 0.00 (2)
TmPNN: 0.67 0.00 (3)
TmPNN: 0.02 0.00 (4)
TmPNN: 0.05 0.00 (5)
TmPNN: 0.04 0.00 (6)


In [28]:
#emmisions
gas_df = pd.concat([pd.read_csv('gas/gt_201'+str(i)+'.csv')for i in range(1, 6)])

names = {0:'CO',1:'NO'}

for i in range(2):
    X, y = sarcos_df.iloc[:,:-2], sarcos_df.iloc[:,-2+i]
    scores = cross_val_score(CatBoostRegressor(verbose=False), X, y, cv=cv, scoring=make_scorer(mean_squared_error))
    print(f"CatBoost: {scores.mean():.2f} {scores.std():.2f} ({names[i]})")

X, y = sarcos_df.iloc[:,:-2].values, sarcos_df.iloc[:,-2:].values
X = MinMaxScaler((-0.5,0.5)).fit_transform(X)
scores = {}
for train, test in tqdm(cv.split(X, y), leave=False):
    model = Regression(X.shape[1],2,2,7,is_scale=False, learning_rate=1e-3) 

    model.fit(X[train], y[train], epochs=500, verbose=0)

    pred = model.predict(X[test])
    for i in range(2):
        scores[i] = (mean_squared_error(y[test,i], pred[:,i]))
for i in range(2):
    print(f"TmPNN: {np.mean(scores[i]):.2f} {np.std(scores[i]):.2f} ({names[i]})")

CatBoost: 0.04 0.00 (CO)
CatBoost: 0.04 0.00 (NO)


                          

TmPNN: 0.02 0.00 (CO)
TmPNN: 0.02 0.00 (NO)




In [18]:
#california
from sklearn.datasets import fetch_california_housing
california_housing = fetch_california_housing(as_frame=True)
california_df = california_housing.frame

X, y = california_df.iloc[:,:-1], california_df.iloc[:,-1]
scores = cross_val_score(CatBoostRegressor(verbose=False), X, y, cv=cv, scoring=make_scorer(mean_squared_error))
print(f"CatBoost: {scores.mean():.2f} {scores.std():.2f}")

X, y = california_df.iloc[:,:-1].values, california_df.iloc[:,-1].values
X = MinMaxScaler((-0.5,0.5)).fit_transform(X)
scores = []
for train, test in tqdm(cv.split(X, y)):
    model = Regression(X.shape[1],1,2,7,is_scale=False, learning_rate=1e-3) 

    model.fit(X[train], y[train], epochs=500, verbose=0)

    scores.append(mean_squared_error(y[test], model.predict(X[test])))
print(f"TmPNN: {np.mean(scores):.2f} {np.std(scores):.2f}")

CatBoost: 0.19 0.01


10it [09:17, 55.71s/it]

TmPNN: 0.36 0.01





In [75]:
#concrete
concrete_df = pd.read_excel('Concrete_Data.xls')

X, y = concrete_df.iloc[:,:-1], concrete_df.iloc[:,-1]
scores = cross_val_score(CatBoostRegressor(verbose=False), X, y, cv=cv, scoring=make_scorer(mean_squared_error))
print(f"CatBoost: {scores.mean():.2f} {scores.std():.2f}")

X, y = concrete_df.iloc[:,:-1].values, concrete_df.iloc[:,-1].values
X = MinMaxScaler((-0.5,0.5)).fit_transform(X)
scores = []
for train, test in tqdm(cv.split(X, y), leave=False):
    model = Regression(X.shape[1],1,2,7,is_scale=False, learning_rate=1e-3) 
    
    X_tr=X[train]
    Y_tr=y[train]

    history1 = model.fit(X_tr, Y_tr, epochs=10000, verbose=0)

    scores.append(mean_squared_error(y[test], model.predict(X[test])))
print(f"TmPNN: {np.mean(scores):.2f} {np.std(scores):.2f}")

CatBoost: 16.45 7.59


                       

TmPNN: 27.54 3.05




In [55]:
#yacht
yacht_df = pd.read_csv('yacht_hydrodynamics.data', sep=' ', encoding='utf-8', names=['lcg', 'cp', 'volume', 'b/d', 'l/b', 'fn', 'r',])

X, y = yacht_df.iloc[:,:-1], yacht_df.iloc[:,-1]
scores = cross_val_score(CatBoostRegressor(verbose=False), X, y, cv=cv, scoring=make_scorer(mean_squared_error))
print(f"CatBoost: {scores.mean():.2f} {scores.std():.2f}")

X, y = yacht_df.iloc[:,:-1].values, yacht_df.iloc[:,-1].values
X = MinMaxScaler((0,0.1)).fit_transform(X)
scores = []
for train, test in tqdm(cv.split(X, y), leave=False):
    model = Regression(X.shape[1],1,2,7,is_scale=False, learning_rate=1e-2) 
    
    model.fit(X[train], y[train], epochs=30000, verbose=0)

    scores.append(mean_squared_error(y[test], model.predict(X[test])))
print(f"TmPNN: {np.mean(scores):.2f} {np.std(scores):.2f}")

CatBoost: 0.72 0.37


                        

TmPNN: 2.32 0.57




In [67]:
#airfoil
airfoil_df = pd.read_csv('airfoil_self_noise.dat', sep='\t', header=None)

X, y = airfoil_df.iloc[:,:-1], airfoil_df.iloc[:,-1]
scores = cross_val_score(CatBoostRegressor(verbose=False), X, y, cv=cv, scoring=make_scorer(mean_squared_error))
print(f"CatBoost: {scores.mean():.2f} {scores.std():.2f}")

X, y = airfoil_df.iloc[:,:-1].values, airfoil_df.iloc[:,-1].values
X = MinMaxScaler((-0.5,0.5)).fit_transform(X)
scores = []
for train, test in tqdm(cv.split(X, y), leave=False):
    model = Regression(X.shape[1],1,2,7,is_scale=False, learning_rate=1e-3) 

    history1 = model.fit(X[train], y[train], epochs=2000, verbose=0)

    scores.append(mean_squared_error(y[test], model.predict(X[test])))
print(f"TmPNN: {np.mean(scores):.2f} {np.std(scores):.2f}")

CatBoost: 2.05 0.33


                       

TmPNN: 13.02 2.14


