In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import pandas as pd
import itertools
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv('scaled_data.csv')
data = data[:1000]
X = data.drop('popularity', axis=1)
y = data['popularity']

In [None]:
grid = {
    'constant_value': [0.1, 1.0, 10.0],
    'length_scale': [0.1, 1.0, 10.0],
    'alpha': [1e-10, 1e-2, 1e-1],  # noise / regularization term
    'n_restarts': [0, 5]  # kernel hyperparam optimizasyonu için yeniden başlama sayısı
}

In [None]:
def grid_search_gpr(X, y, grid):
    X_tr, X_val, y_tr, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    scaler = StandardScaler()
    X_tr_scaled = scaler.fit_transform(X_tr)
    X_val_scaled = scaler.transform(X_val)

    best_score = -np.inf
    best_params = None

    for combo in itertools.product(*grid.values()):
        params = dict(zip(grid.keys(), combo))

        # Kernel: Constant * RBF  (Constant = signal variance)
        kernel = params.get('constant_value', 1.0) * RBF(length_scale=params.get('length_scale', 1.0))
        gp = GaussianProcessRegressor(kernel=kernel,
                                      alpha=params.get('alpha', 1e-10),
                                      normalize_y=True,
                                      n_restarts_optimizer=params.get('n_restarts', 5),
                                      random_state=42)
        gp.fit(X_tr_scaled, y_tr)
        y_pred = gp.predict(X_val_scaled)
        score = r2_score(y_val, y_pred)

        if score > best_score:
            best_score = score
            best_params = params

    return best_params, best_score

In [None]:
X_train1, X_test, y_train1, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)
best_params, best_mae = grid_search_rbf(X_train1, y_train1, grid)
print("Best MAE: ", best_mae)
print("Best Parameters: ", best_params)

In [None]:
model = (**best_params)
model.fit(X_train1, y_train1)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print("MAE Score for LightGBM with best parameters: ", mae)
r2 = r2_score(y_test, y_pred)
print("R2 Score for LightGBM with best parameters: ", r2)
