# Linear regression

In [1]:
from classic.model.linear import LinearRegression, RidgeRegression, SGDRegression, LogisticRegression
from classic.util.scheduler import StaticScheduler
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn import linear_model

In [2]:
dataset = load_diabetes()
dataset.keys()

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])

In [3]:
X, y = dataset['data'], dataset['target']
y[:2]

array([151.,  75.])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

## 1. Simple Linear Regression

In [5]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [6]:
pred = lr.predict(X_test)

In [7]:
lr_sklearn = linear_model.LinearRegression()
lr_sklearn.fit(X_train, y_train)
pred_sklearn = lr_sklearn.predict(X_test)

In [8]:
mse = mean_squared_error(y_test, pred)
mse_sklearn = mean_squared_error(y_test, pred_sklearn)

print(f"My MSE={mse}, sklearn MSE={mse_sklearn}")

My MSE=2861.200880556817, sklearn MSE=2861.2008805568184


## 2. Ridge Regression

In [9]:
ridge = RidgeRegression(alpha=0.05)
ridge.fit(X_train, y_train)
pred = ridge.predict(X_test)
print(f"Ridge mse = {mean_squared_error(y_test, pred)}")

Ridge mse = 2780.8287336334374


## 3. SGDRegression

In [10]:
scheduler = StaticScheduler(learning_rate=1e-2)
sgd = SGDRegression(n_epochs=50_000, batch_size=32, alpha=0.0005, lr_scheduler=scheduler)
sgd.fit(X_train, y_train)
pred = sgd.predict(X_test)
print(f"SGD Regression mse = {mean_squared_error(y_test, pred)}")

SGD Regression mse = 2741.327356741339


In [11]:
sgd_sklearn = linear_model.SGDRegressor(max_iter=50000)
sgd_sklearn.fit(X_train, y_train)
pred = sgd_sklearn.predict(X_test)
print(f"Sklearn SGD Regression mse = {mean_squared_error(y_test, pred)}")

Sklearn SGD Regression mse = 2717.438528301624


# Linear classification

In [12]:
dataset = load_breast_cancer()
X, y = dataset.data, dataset.target
np.unique(y)

array([0, 1])

In [13]:
y[y == 0] = -1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

## 1. Logistic Regression

In [26]:
scheduler = StaticScheduler(learning_rate=1e-2)
logreg = LogisticRegression(n_epochs=5_000, batch_size=32, alpha=0.0005, lr_scheduler=scheduler)
logreg.fit(X_train, y_train)
pred = logreg.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, pred)}")

Accuracy = 0.9181286549707602


In [17]:
logreg_sklearn = linear_model.LogisticRegression(max_iter=5000)
logreg_sklearn.fit(X_train, y_train)
pred = logreg_sklearn.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, pred)}")

Accuracy = 0.9649122807017544
