In [None]:
from optimization import optimize
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from oracle import Oracle, make_oracle
import seaborn as sns
from typing import List
from tabulate import tabulate

In [None]:
from sklearn.linear_model import LogisticRegression
def optimize_sklearn(oracle):
    model = LogisticRegression(penalty="none", tol=1e-8, max_iter=10000, n_jobs=-1, fit_intercept=False)
    model.fit(oracle.X, oracle.Y.ravel())

    entropy_true = oracle.value(model.coef_.reshape(-1, 1))
    return entropy_true

In [None]:
num_points = 4000
num_features = 50

w = np.random.uniform(-1, 1, size=(num_features,1))
oracle = make_oracle(size=num_points, w=w)
true_ans = optimize_sklearn(oracle)

w0 = np.zeros((oracle.m, 1))

to_df = []
optimization_methods = ["gradient_descent", "newton", "conjugate_gradient"]
linesearch_methods = ["golden_section", "brent", "dbrent", "armijo", "wolfe"]
for opt in optimization_methods:
    if opt == "gradient_descent":
        to_iter = linesearch_methods + ["nesterov"]
    else:
        to_iter = linesearch_methods
        
    for ls in to_iter:
        if ls == "armijo":
            c1 = 0.25
            c2 = None
        elif ls == "wolfe":
            c1 = 1e-4
            c2 = 0.9
        elif ls == "nesterov":
            c1 = c2 = 2.0
        else:
            c1 = 0.25
            c2 = None

        _, _, log = optimize(w0, oracle, opt, ls, output_log=True, c1=c1, c2=c2, tol=1e-30)
        to_df.append(log.best)

df = pd.concat(to_df, ignore_index=True)
table = [df.columns.values.tolist()] + df.values.tolist()
print(f"- {num_points} x {num_features} {true_ans}-")
print(tabulate(table, headers="firstrow", tablefmt="github", floatfmt=["", "", ".0e", ".2e", ".2e", "", ".2e", "", ".4f", ".1e"]))

Рандомный датасет на $4000$ точек и $50$ фичей. Значение энтропии из **sklearn** = 3.98039543e-12

| OptMethod          | LineSearch     |   tol |       c1 |       c2 |                entropy |   num_iter |   oracle_calls |     time |      rk |
|--------------------|----------------|-------|----------|----------|------------------------|------------|----------------|----------|---------|
| gradient_descent   | golden_section | 1e-30 | 2.50e-01 |          |  0.0009587512255371034 |      10000 |         465006 |  72.4897 | 2.3e-09 |
| gradient_descent   | brent          | 1e-30 | 2.50e-01 |          |  0.0009580380514109347 |      10000 |         232481 |  40.0178 | 2.3e-09 |
| gradient_descent   | dbrent         | 1e-30 | 2.50e-01 |          |  0.0009587523801053996 |      10000 |         319433 | 191.1798 | 2.3e-09 |
| gradient_descent   | armijo         | 1e-30 | 2.50e-01 |          |  0.0002495345717689766 |      10000 |          87420 |  22.2659 | 2.1e-10 |
| gradient_descent   | wolfe          | 1e-30 | 1.00e-04 | 9.00e-01 |  0.0006462985712436903 |      10000 |         214522 |  70.0297 | 6.1e-10 |
| gradient_descent   | nesterov       | 1e-30 | 2.00e+00 | 2.00e+00 |  0.000677562148132333  |      10000 |          39987 |  10.8523 | 7.7e-10 |
| newton             | golden_section | 1e-30 | 2.50e-01 |          | -9.951572343953082e-13 |         39 |            115 |   0.1067 | 7.0e-31 |
| newton             | brent          | 1e-30 | 2.50e-01 |          | -9.951572343953082e-13 |         39 |            115 |   0.1102 | 7.0e-31 |
| newton             | dbrent         | 1e-30 | 2.50e-01 |          | -9.951572343953082e-13 |         39 |            115 |   0.1292 | 7.0e-31 |
| newton             | armijo         | 1e-30 | 2.50e-01 |          | -9.951572343953082e-13 |         39 |            115 |   0.1127 | 7.0e-31 |
| newton             | wolfe          | 1e-30 | 1.00e-04 | 9.00e-01 | -9.951572343953082e-13 |         39 |            115 |   0.1072 | 7.0e-31 |
| conjugate_gradient | golden_section | 1e-30 | 2.50e-01 |          |  3.073073741203821e-10 |         28 |             82 |   0.2319 | 3.8e-32 |
| conjugate_gradient | brent          | 1e-30 | 2.50e-01 |          |  3.073073741203821e-10 |         28 |             82 |   0.2349 | 3.8e-32 |
| conjugate_gradient | dbrent         | 1e-30 | 2.50e-01 |          |  3.073073741203821e-10 |         28 |             82 |   0.2399 | 3.8e-32 |
| conjugate_gradient | armijo         | 1e-30 | 2.50e-01 |          |  3.073073741203821e-10 |         28 |             82 |   0.2573 | 3.8e-32 |
| conjugate_gradient | wolfe          | 1e-30 | 1.00e-04 | 9.00e-01 |  3.073073741203821e-10 |         28 |             82 |   0.2748 | 3.8e-32 |

В среднем, сложно что-то сказать: градиентный спуск не сошелся за $10000$ итераций, сопряженные градиенты сколько-то приблизилист к правильному ответу, а Ньютон сошел с ума и упал ниже подвала.