In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.gaussian_process.kernels import ConstantKernel, DotProduct, ExpSineSquared, Matern, RBF, RationalQuadratic, WhiteKernel
from sklearn.model_selection import RandomizedSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.utils import shuffle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls

In [None]:
%cd /content/drive/MyDrive/Doutorado/Tese-Doutorado/teste-comparacao/dataset/

!ls

In [None]:
file_path = "./dfCity_library_100.csv"
data = pd.read_csv(file_path)

In [None]:
metrics = ['mse', 'rmse', 'lml', 'std', 'R2_tr', 'R2', 'R2_test']

In [None]:
model = Sequential([
  Input(shape=(len(metrics),)),
  Dense(64, activation='relu'),
  Dense(32, activation='relu'),
  Dense(16, activation='relu'),
  Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
data['best_combination'] = (data['mse'] == data['mse'].min()).astype(int)

In [None]:
X = data[metrics].values
y = data['best_combination'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=8, validation_split=0.2)

In [None]:
y_pred = model.predict(X)

In [None]:
best_index = y_pred.argmax()
best_kernel = data.iloc[best_index]

In [None]:
best_index

In [None]:
print('Melhor combinação de kernels:')
print(best_kernel)
print("Best Kernel", best_kernel[1])

In [None]:
best_kernel[1]

In [None]:
kernel = (Matern(length_scale=1, nu=1.5) * Matern(length_scale=1, nu=1.5) + ExpSineSquared(length_scale=1, periodicity=1) * DotProduct(sigma_0=1))
print(kernel.get_params().keys())

In [None]:
kernel = eval(best_kernel[0])

In [None]:
X_train_shuffled, y_train_shuffled = shuffle(X_train, y_train, random_state=42)
X_train_sample = X_train_shuffled[:1000]
y_train_sample = y_train_shuffled[:1000]

In [None]:
param_grid = {

    'kernel__k1__k1__length_scale': [10, 50, 100, 500],
    'kernel__k1__k1__nu': [0.5, 1.5, 2.5],

    'kernel__k1__k2__length_scale': [10, 50, 100, 500],
    'kernel__k1__k2__nu': [0.5, 1.5, 2.5],

    'kernel__k2__k1__length_scale': [0.1, 0.5, 1, 2],
    'kernel__k2__k1__periodicity': [1e3, 1e4, 1e5],

    'kernel__k2__k2__sigma_0': [1e-4, 1e-3, 1e-2, 1e-1]
}

In [None]:
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6)

In [None]:
search = RandomizedSearchCV(
    gp,
    param_distributions=param_grid,
    n_iter=20,
    cv=2,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1
)

In [None]:
search.fit(X_train_sample, y_train_sample)

In [None]:
print("Melhores hiperparâmetros encontrados:")
print(search.best_params_)
print("Kernel otimizado:")
print(search.best_estimator_.kernel_)