In [52]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes, load_breast_cancer

In [5]:
import sys
import os

# Add Folder Z (parent directory) to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [79]:
data = load_diabetes()
X = data.data
y = data.target

In [80]:
linear_model = LinearRegression()
lasso_model = Lasso(alpha=1)
ridge_model = Ridge(alpha=10)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

for model in [linear_model, lasso_model, ridge_model]:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f'Model: {model}. MSE: {mean_squared_error(y_test, y_pred)}')

Model: LinearRegression(). MSE: 2724.258079598755
Model: Lasso(alpha=1). MSE: 3981.380656013544
Model: Ridge(alpha=10). MSE: 5326.446992922956


In [81]:
if 'utils.MyRegression' in sys.modules:
    del sys.modules['utils.MyRegression']

from utils.MyRegression import MyRegression

In [82]:
my_linear_model = MyRegression(early_stopping_rounds=1000)
my_lasso_model = MyRegression(l1_ratio=1, early_stopping_rounds=1000)
my_ridge_model = MyRegression(l2_ratio=10, early_stopping_rounds=1000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

for model in [my_linear_model, my_lasso_model, my_ridge_model]:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f'L1: {model.l1_ratio}, L2: {model.l2_ratio}. MSE: {mean_squared_error(y_test, y_pred)}')

L1: 0, L2: 0. MSE: 2787.589524543088
L1: 1, L2: 0. MSE: 3991.9906719007695
L1: 0, L2: 10. MSE: 6307.611462977863


In [93]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, confusion_matrix, roc_auc_score

In [83]:
clssification_data = load_breast_cancer()
X = clssification_data.data
y = clssification_data.target

In [84]:
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [105]:
model = LogisticRegression()

model.fit(X_train_scaled, y_train)
y_pred = model.predict_proba(X_test_scaled)[:, 1]

print(log_loss(y_test, y_pred))
# print(confusion_matrix(y_test, y_pred))
print(roc_auc_score(y_test, y_pred))

0.07436755510664023
0.9913130638155696


In [106]:
model = MyRegression(type='logistic')

model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print(log_loss(y_test, y_pred))
# print(confusion_matrix(y_test, y_pred))
print(roc_auc_score(y_test, y_pred))

0.09282052518591602
0.9909789508853992
