In [1]:
import sys 
sys.path.append('../')

import numpy as np
from tqdm import tqdm

## Polynomial Regression

In [2]:
X = np.random.randn(*(500, 1))
y = X[:,0]**2 + np.random.normal(size=(500,))
y = y.reshape(-1, 1)

In [3]:
from src.linear_models import PolynomialRegression

my_PR = PolynomialRegression(degree=3)
my_PR.fit(X, y, lr=0.01)

 10%|█         | 1017/10000 [00:00<00:00, 15295.27it/s]

Converged after 1017 iterations





In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

sklearn_PR = LinearRegression()
sklearn_poly = PolynomialFeatures(degree=3)
X_poly = sklearn_poly.fit_transform(X)
sklearn_PR.fit(X_poly, y)

In [5]:
from sklearn.metrics import mean_squared_error

X_test = np.random.randn(*(500, 1))
y_test = X_test[:,0]**2 + np.random.normal(size=(500,))
y_test = y_test.reshape(-1, 1)

print("sklearn PolynomialRegression:")
print("----------------------------")
y_hat_sklearn = sklearn_PR.predict(sklearn_poly.fit_transform(X_test))
print(f"MSE: {mean_squared_error(y_test, y_hat_sklearn)}") 

print("\nmy PolynomialRegression:")
print("----------------------------")
y_hat = my_PR.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_hat)}") 

sklearn PolynomialRegression:
----------------------------
MSE: 0.9787802642517783

my PolynomialRegression:
----------------------------
MSE: 0.9787732665855997


## Logistic regression

In [2]:
from sklearn.preprocessing import StandardScaler

X = np.random.randint(0, 10, (50, 3))
y = np.where(X[:,1]>5, 1, 0)
X = StandardScaler().fit_transform(X)

In [None]:
from src.linear_models import LogisticRegression

my_LR = LogisticRegression()
my_LR.fit(X, y, max_iter=1e4)
print(my_LR.intercept_, my_LR.coef_)

100%|██████████| 10000/10000 [00:00<00:00, 32756.18it/s]

[-1.19719468] [[-0.02067661]
 [ 3.84482445]
 [-0.31935842]]





In [9]:
from sklearn.linear_model import LogisticRegression

sklearn_LR = LogisticRegression()
sklearn_LR.fit(X, y)
sklearn_LR.intercept_, sklearn_LR.coef_

(array([-1.06713176]), array([[-0.0391883 ,  2.83963717, -0.18521102]]))

In [10]:
from sklearn.metrics import accuracy_score, confusion_matrix

X_test = np.random.randint(0, 10, (500, 3))
y_test = np.where(X_test[:,1]>5, 1, 0).reshape(-1, 1)
X_test = StandardScaler().fit_transform(X_test)

print("sklearn LogisticRegression:")
print("----------------------------")
y_hat_sklearn = sklearn_LR.predict(X_test)
print(f"accuracy: {accuracy_score(y_test, y_hat_sklearn)}") 
print(f"confusion matrix:\n {confusion_matrix(y_test, y_hat_sklearn)}")

print("\nmy LogisticRegression:")
print("----------------------------")
y_hat = my_LR.predict(X_test)
print(f"accuracy: {accuracy_score(y_test, y_hat)}") 
print(f"confusion matrix:\n {confusion_matrix(y_test, y_hat)}")

sklearn LogisticRegression:
----------------------------
accuracy: 1.0
confusion matrix:
 [[314   0]
 [  0 186]]

my LogisticRegression:
----------------------------
accuracy: 0.986
confusion matrix:
 [[307   7]
 [  0 186]]


## Poisson Regression

In [60]:
# generate new data for poisson regression
X = np.random.randint(0, 10, (50, 3))
y = np.random.poisson(lam=5, size=50).reshape(-1, 1)

In [None]:
from src.linear_models import PoissonRegression
my_PR = PoissonRegression()
my_PR.fit(X, y, lr=1e-5, max_iter=5e5, tol=1e-6)
print(my_PR.intercept_, my_PR.coef_)

  3%|▎         | 16962/500000 [00:00<00:10, 44542.06it/s]

Converged after 16962 iterations
[1.74256309] [[-0.00744019]
 [ 0.00910656]
 [-0.00163493]]





In [66]:
from sklearn.linear_model import PoissonRegressor
sklearn_PR = PoissonRegressor()
sklearn_PR.fit(X, y)
print(sklearn_PR.intercept_, sklearn_PR.coef_)

1.7446243567044708 [-0.00746065  0.00870162 -0.00160346]


  y = column_or_1d(y, warn=True)


In [67]:
from sklearn.metrics import mean_squared_error

X_test = np.random.randint(0, 10, (500, 3))
y_test = [np.random.poisson(lam=l, size=1)[0] for l in X_test[:,1]]
y_test = np.array(y_test).reshape(-1, 1)

print("sklearn PoissonRegression:")
print("----------------------------")
y_hat_sklearn = sklearn_PR.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_hat_sklearn)}") 

print("\nmy PoissonRegression:")
print("----------------------------")
y_hat = my_PR.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_hat)}") 

sklearn PoissonRegression:
----------------------------
MSE: 12.776912326778337

my PoissonRegression:
----------------------------
MSE: 12.737270137057592


In [68]:
for i,j,k in zip(y_test[:10], y_hat_sklearn[:10], y_hat[:10]):
    print(i, j, k)

[8] 5.878168335204672 [5.88032483]
[2] 6.063752119213472 [6.06806115]
[8] 5.8532172989004065 [5.85585301]
[6] 5.8459581350097665 [5.84610314]
[1] 5.476930347926387 [5.46767694]
[2] 5.4275111700975565 [5.41828156]
[7] 5.709477935042317 [5.70754147]
[4] 5.706300652480878 [5.703351]
[3] 5.615677154716526 [5.6196412]
[8] 5.753116694251083 [5.76029812]


## Linear Regression

In [7]:
X = np.random.randint(0, 10, (50, 3))
y = X[:,0] + np.random.normal(size=(50,))
y = y.reshape(-1, 1)

In [None]:
from src.linear_models import LinearRegression
my_LR = LinearRegression()
my_LR.fit(X, y, lr=1e-5, max_iter=5e5, tol=1e-6)
print(my_LR.intercept_, my_LR.coef_)

  8%|▊         | 38787/500000 [00:01<00:14, 32767.08it/s]

Converged after 38787 iterations
[0.0962216] [[ 1.00276225]
 [-0.00801914]
 [ 0.01859633]]





In [10]:
from sklearn.linear_model import LinearRegression
sklearn_LR = LinearRegression()
sklearn_LR.fit(X, y)
print(sklearn_LR.intercept_, sklearn_LR.coef_)

[0.55860379] [[ 0.97744814 -0.04122052 -0.01641225]]


In [11]:
from sklearn.metrics import mean_squared_error

X_test = np.random.randint(0, 10, (500, 3))
y_test = X_test[:,0] + np.random.normal(size=(500,))
y_test = y_test.reshape(-1, 1)

print("sklearn PoissonRegression:")
print("----------------------------")
y_hat_sklearn = sklearn_LR.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_hat_sklearn)}") 

print("\nmy PoissonRegression:")
print("----------------------------")
y_hat = my_LR.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_hat)}") 

sklearn PoissonRegression:
----------------------------
MSE: 1.0423050742739506

my PoissonRegression:
----------------------------
MSE: 1.003154421152537


In [13]:
print("True, sklearn, mine")
for i,j,k in zip(y_test[:10], y_hat_sklearn[:10], y_hat[:10]):
    print(i, j, k)

True, sklearn, mine
[0.50129658] [1.33872516] [1.21311991]
[4.93237506] [6.01948345] [6.07781555]
[8.44279345] [8.16444988] [8.251052]
[2.99358331] [2.2833488] [2.25307482]
[7.63646569] [6.08551224] [6.11245015]
[7.64277733] [9.19113476] [9.19802525]
[7.16335419] [5.90459772] [6.20798988]
[8.09732175] [8.09002506] [8.17120559]
[8.54445288] [9.2403715] [9.14223625]
[8.0074246] [7.93353901] [8.18434085]
