In [194]:
!pip install optuna



In [195]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifier
from pandas import read_csv

In [196]:
file_path = "/workspaces/eargweth/no_null_encoded_titanic.csv"
dataset = read_csv(file_path)

In [197]:
label_of_the_column_that_we_want = "survived"
y = dataset[label_of_the_column_that_we_want]
display(y)

0      0
1      1
2      1
3      1
4      0
      ..
709    0
710    0
711    1
712    1
713    0
Name: survived, Length: 714, dtype: int64

In [198]:
columns_to_drop = ['survived', 'alive']

X = dataset.drop(columns_to_drop, axis = 1)

display(X)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,pclass,age,sibsp,parch,fare,adult_male,alone,sex_female,...,deck_A,deck_B,deck_C,deck_D,deck_E,deck_F,deck_G,embark_town_Cherbourg,embark_town_Queenstown,embark_town_Southampton
0,0,0,3,22.0,1,0,7.2500,True,False,False,...,False,False,False,False,False,False,False,False,False,True
1,1,1,1,38.0,1,0,71.2833,False,False,True,...,False,False,True,False,False,False,False,True,False,False
2,2,2,3,26.0,0,0,7.9250,False,True,True,...,False,False,False,False,False,False,False,False,False,True
3,3,3,1,35.0,1,0,53.1000,False,False,True,...,False,False,True,False,False,False,False,False,False,True
4,4,4,3,35.0,0,0,8.0500,True,True,False,...,False,False,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709,885,885,3,39.0,0,5,29.1250,False,False,True,...,False,False,False,False,False,False,False,False,True,False
710,886,886,2,27.0,0,0,13.0000,True,True,False,...,False,False,False,False,False,False,False,False,False,True
711,887,887,1,19.0,0,0,30.0000,False,True,True,...,False,True,False,False,False,False,False,False,False,True
712,889,889,1,26.0,0,0,30.0000,True,True,False,...,False,False,True,False,False,False,False,True,False,False


In [199]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

In [200]:

def objective(trial):

    suggested_alpha = trial.suggest_float(name = "alpha", low = 0 , high = 100)

    suggested_fit_intercept = trial.suggest_categorical(name = "fit_intercept", choices = [True, False])

    suggested_solver = trial.suggest_categorical(name = "solver", choices = ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga", "lbfgs"])

    suggested_max_iter = trial.suggest_int(name = "max_iter", low = 100, high = 100000)

    if suggested_solver == "lbfgs":
        suggested_positive_hyperparameter = True
    else:
        suggested_positive_hyperparameter = False
    
    model = RidgeClassifier(
        alpha = suggested_alpha,
        fit_intercept= suggested_fit_intercept,
        solver = suggested_solver,
        max_iter= suggested_max_iter,
        positive = suggested_positive_hyperparameter
    )

    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    val_score = accuracy_score(y_test, predictions)

    return val_score

In [201]:
from optuna import create_study

study = create_study(direction="maximize")

study.optimize(func = objective, n_trials = 100)

best_trial = study.best_trial

best_parameters = best_trial.params

display(best_parameters)

[I 2025-07-15 17:30:28,057] A new study created in memory with name: no-name-0bb3330c-72c5-4906-87f3-ebffcef46b38
[I 2025-07-15 17:30:28,074] Trial 0 finished with value: 0.8156424581005587 and parameters: {'alpha': 70.88162373477556, 'fit_intercept': False, 'solver': 'lsqr', 'max_iter': 10877}. Best is trial 0 with value: 0.8156424581005587.
[I 2025-07-15 17:30:28,474] Trial 1 finished with value: 0.8212290502793296 and parameters: {'alpha': 48.224644737376686, 'fit_intercept': True, 'solver': 'sag', 'max_iter': 95068}. Best is trial 1 with value: 0.8212290502793296.
[I 2025-07-15 17:30:28,481] Trial 2 finished with value: 0.8268156424581006 and parameters: {'alpha': 67.53976621255417, 'fit_intercept': True, 'solver': 'svd', 'max_iter': 63676}. Best is trial 2 with value: 0.8268156424581006.
[I 2025-07-15 17:30:28,487] Trial 3 finished with value: 0.8212290502793296 and parameters: {'alpha': 99.90208134127745, 'fit_intercept': True, 'solver': 'cholesky', 'max_iter': 18024}. Best is tr

{'alpha': 52.554619140324036,
 'fit_intercept': False,
 'solver': 'svd',
 'max_iter': 81023}