# Linear regression

In [1]:
import sys

sys.path.insert(1, "..")

from classic.model.linear import (
    LinearRegression,
    RidgeRegression,
    SGDRegression,
    LogisticRegression,
)
from classic.util.scheduler import StaticScheduler
from sklearn.datasets import load_diabetes, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn import linear_model

In [2]:
dataset = load_diabetes()
X, y = dataset["data"], dataset["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

## 1. Simple Linear Regression

In [3]:
lr = LinearRegression()
lr.fit(X_train, y_train)
pred = lr.predict(X_test)

lr_sklearn = linear_model.LinearRegression()
lr_sklearn.fit(X_train, y_train)
pred_sklearn = lr_sklearn.predict(X_test)

mse = mean_squared_error(y_test, pred)
mse_sklearn = mean_squared_error(y_test, pred_sklearn)

print(f"My MSE={mse}, sklearn MSE={mse_sklearn}")

My MSE=3300.469378319162, sklearn MSE=3300.4693783191583


## 2. Ridge Regression

In [4]:
ridge = RidgeRegression(alpha=0.05)
ridge.fit(X_train, y_train)
pred = ridge.predict(X_test)
print(f"Ridge mse = {mean_squared_error(y_test, pred)}")

Ridge mse = 3276.2339721547014


## 3. SGDRegression

In [5]:
scheduler = StaticScheduler(learning_rate=1e-2)
sgd = SGDRegression(
    n_epochs=50_000, batch_size=32, alpha=0.0005, lr_scheduler=scheduler
)
sgd.fit(X_train, y_train)
pred = sgd.predict(X_test)
print(f"SGD Regression mse = {mean_squared_error(y_test, pred)}")

SGD Regression mse = 3239.904604924718


In [6]:
sgd_sklearn = linear_model.SGDRegressor(max_iter=50000)
sgd_sklearn.fit(X_train, y_train)
pred = sgd_sklearn.predict(X_test)
print(f"Sklearn SGD Regression mse = {mean_squared_error(y_test, pred)}")

Sklearn SGD Regression mse = 3249.4643914031963


# Linear classification

In [7]:
dataset = load_breast_cancer()
X, y = dataset.data, dataset.target
y[y == 0] = -1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

## 1. Logistic Regression

In [8]:
scheduler = StaticScheduler(learning_rate=1e-2)
logreg = LogisticRegression(
    n_epochs=5_000, batch_size=32, alpha=0.0005, lr_scheduler=scheduler
)
logreg.fit(X_train, y_train)
pred = logreg.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, pred)}")

Accuracy = 0.8888888888888888


In [9]:
logreg_sklearn = linear_model.LogisticRegression(max_iter=5000)
logreg_sklearn.fit(X_train, y_train)
pred = logreg_sklearn.predict(X_test)
print(f"Accuracy (Sklearn) = {accuracy_score(y_test, pred)}")

Accuracy (Sklearn) = 0.9532163742690059
