# Task 3

In [147]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel as C
from scipy.optimize import minimize

In [148]:
import pandas as pd

df = pd.read_csv('/mnt/c/Users/danie/PyCharmProjects/ml/UCI_Credit_Card.csv', sep=',')
df.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


In [149]:
len(df)

30000

In [150]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

X = df.drop(columns=['default.payment.next.month', 'ID'])
y = df['default.payment.next.month']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [151]:
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessRegressor

def evaluate(params):
    n_estimators, max_depth = int(params[0]), int(params[1])
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42, n_jobs=-1)
    
    scores = cross_val_score(model, X_scaled, y, cv=3, scoring='accuracy', n_jobs=-1)
    return -np.mean(scores)


# Calc Lower confidence bound
def lcb(x):
    x = np.array(x).reshape(1, -1)
    mean, std = gp.predict(x, return_std=True)
    return mean - 1.96 * std  # Lower Confidence Bound


# Hyperparams boundaries
param_bounds = np.array([[10, 200],  # n_estimators
                         [3, 20]])  # max_depth

# Init gaussian process
kernel = (
    C(1.0, (1e-3, 1e3))
    * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e10))
    + WhiteKernel(noise_level=1e-6, noise_level_bounds=(1e-12, 1e1))
)
gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True)

# Starting GP params and initial fit
n_start_p = 5
start_p = np.random.uniform(param_bounds[:, 0], param_bounds[:, 1], size=(n_start_p, param_bounds.shape[0]))
start_scores = np.array([evaluate(p) for p in start_p])

gp.fit(start_p, start_scores)

best_score = float('-inf')
best_params = None

# Bayesian optimization
n_iterations = 20
for i in range(n_iterations):
    # Minimize LCB
    result = minimize(lcb,
                      x0=np.random.uniform(param_bounds[:, 0], param_bounds[:, 1]),
                      bounds=param_bounds)
    
    new_params = result.x
    new_score = evaluate(new_params)
    
    # Update GP
    X_new = np.vstack([gp.X_train_, new_params])
    y_new = np.append(gp.y_train_, new_score)
    gp.fit(X_new, y_new)
    
    if -new_score > best_score:
        best_score = -new_score
        best_params = new_params
    
    print(f"Iteration {i+1}: Params: {new_params}, Score: {-new_score:.4f}")

print(f"\nBest Parameters: n_estimators={int(best_params[0])}, max_depth={int(best_params[1])}, Accuracy={best_score:.4f}")


Iteration 1: Params: [77.94450149  4.78335105], Score: 0.8120
Iteration 2: Params: [59.04056896 14.65964761], Score: 0.8170
Iteration 3: Params: [100.99000304  14.5623141 ], Score: 0.8184
Iteration 4: Params: [34.22630656 11.59803844], Score: 0.8179
Iteration 5: Params: [159.9452102   17.15123458], Score: 0.8182
Iteration 6: Params: [159.58326817  15.76431934], Score: 0.8183
Iteration 7: Params: [185.81913951  17.78057424], Score: 0.8179
Iteration 8: Params: [25.99353221  8.72878738], Score: 0.8186
Iteration 9: Params: [59.2813153  3.       ], Score: 0.8070
Iteration 10: Params: [103.24745176   3.        ], Score: 0.8090
Iteration 11: Params: [32.75877462  3.        ], Score: 0.8064
Iteration 12: Params: [50.96959033  3.        ], Score: 0.8068
Iteration 13: Params: [67.17342571 20.        ], Score: 0.8175
Iteration 14: Params: [83.00832256 20.        ], Score: 0.8178
Iteration 15: Params: [55.7164225   5.63535942], Score: 0.8134
Iteration 16: Params: [10.  3.], Score: 0.8023
Iteration

In [152]:
import random

n_random_search = 20
random_results = []

for i in range(n_random_search):
    n_estimators = random.randint(10, 200)
    max_depth = random.randint(3, 20)
    
    params = [n_estimators, max_depth]
    score = evaluate(params)
    random_results.append((params, -score))
    
    print(f"Random Search Iteration {i+1}: Params: {params}, Accuracy: {-score:.4f}")

best_random_params, best_random_score = max(random_results, key=lambda x: x[1])

print("\nRandom Search Best:")
print(f"Params: {best_random_params}, Accuracy: {best_random_score:.4f}")

Random Search Iteration 1: Params: [192, 19], Accuracy: 0.8189
Random Search Iteration 2: Params: [129, 17], Accuracy: 0.8177
Random Search Iteration 3: Params: [142, 6], Accuracy: 0.8192
Random Search Iteration 4: Params: [104, 9], Accuracy: 0.8211
Random Search Iteration 5: Params: [73, 4], Accuracy: 0.8117
Random Search Iteration 6: Params: [158, 8], Accuracy: 0.8198
Random Search Iteration 7: Params: [170, 5], Accuracy: 0.8145
Random Search Iteration 8: Params: [146, 19], Accuracy: 0.8187
Random Search Iteration 9: Params: [105, 12], Accuracy: 0.8193
Random Search Iteration 10: Params: [123, 14], Accuracy: 0.8181
Random Search Iteration 11: Params: [51, 14], Accuracy: 0.8169
Random Search Iteration 12: Params: [123, 16], Accuracy: 0.8190
Random Search Iteration 13: Params: [192, 13], Accuracy: 0.8196
Random Search Iteration 14: Params: [37, 8], Accuracy: 0.8189
Random Search Iteration 15: Params: [144, 7], Accuracy: 0.8189
Random Search Iteration 16: Params: [32, 14], Accuracy: 0.8