In [1]:
from LinearModels import MyLinearRegression, MyLogisticRegression
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from LinearModels import MySVMClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.datasets import make_regression, make_classification
from ModelEvaluation import ModelEvaluation
from Metrics import *

In [2]:
def main_regression():
    metrics = [MSE(), R2()]
    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=4, noise=50, random_state=42, coef=False)
    eval = ModelEvaluation(X, y)
    eval.evaluate_model(MyLinearRegression(learning_rate=lambda x: 0.8 * (0.95**x)), model_name="My OLS", metrics=metrics)
    eval.evaluate_model(LinearRegression(), model_name="Sklearn OLS", metrics=metrics)
    eval.evaluate_model(MyLinearRegression(learning_rate=lambda x: 0.8 * (0.95**x), penalty="L1", alpha=1), model_name="My Lasso", metrics=metrics)
    eval.evaluate_model(Lasso(alpha=1), model_name="Sklearn Lasso", metrics=metrics)
    eval.evaluate_model(MyLinearRegression(learning_rate=lambda x: 0.8 * (0.95**x), penalty="L2", alpha=50), model_name="My Ridge", metrics=metrics)
    eval.evaluate_model(Ridge(alpha=50, solver="sag"), model_name="Sklearn Ridge", metrics=metrics)
    print(eval.get_result())

if  __name__ == "__main__":
    main_regression()

2024-12-03 18:43:33 PM - model training - INFO - Алгоритм сошелся. Кол-во итераций: 15
2024-12-03 18:43:33 PM - model training - INFO - Алгоритм сошелся. Кол-во итераций: 171
2024-12-03 18:43:33 PM - model training - INFO - Алгоритм сошелся. Кол-во итераций: 15


           model          MSE        R2
0         My OLS  2462.036216  0.824928
1    Sklearn OLS  2462.036489  0.824928
2       My Lasso  2459.356273  0.825119
3  Sklearn Lasso  2459.356262  0.825119
4       My Ridge  2461.210863  0.824987
5  Sklearn Ridge  2461.213350  0.824986


In [3]:
# регуляризацию для LogReg не реализовывал, т.к. она идентична LinReg
# если указать динамический lr, то алгоритм не всегда сходится с sklearn, вероятно из-за solver-a
def main_classification():
    metrics = [Accuracy(), ROCAUC()]
    predict_proba = True
    X, y = make_classification(n_samples=10_000, n_features=10, n_informative=4, n_redundant=0, n_classes=2, random_state=42, flip_y=0.2)
    eval = ModelEvaluation(X, y)
    eval.evaluate_model(MyLogisticRegression(learning_rate=0.1), model_name="My LogReg", metrics=metrics, predict_proba=predict_proba)
    eval.evaluate_model(LogisticRegression(penalty=None, solver="sag"), model_name="Sklearn LogReg", metrics=metrics, predict_proba=predict_proba)
    eval.evaluate_model(MySVMClassifier(learning_rate=0.001, alpha=1, verbose=False), model_name='My SVC', metrics=[Accuracy()])
    eval.evaluate_model(SGDClassifier(alpha=1), model_name='Sklearn SVC', metrics=[Accuracy()])
    print(eval.get_result())

if  __name__ == "__main__":
    main_classification()

2024-12-03 18:43:38 PM - model training - INFO - Алгоритм сошелся. Кол-во итераций: 475
2024-12-03 18:43:39 PM - model training - INFO - Алгоритм сошелся. Кол-во итераций: 13


            model  Accuracy   ROC AUC
0       My LogReg  0.680333  0.756508
1  Sklearn LogReg  0.681000  0.756502
2          My SVC  0.705333       NaN
3     Sklearn SVC  0.701667       NaN
