# Linear Regression



## Example of one feature(x1)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
n = 100
x = np.random.randn(n) # batch size
y = x*20 + 10 # w=20, b=10
y = y + np.random.randn(n)*10 # add noise

plt.scatter(x,y)

In [None]:
w = np.random.randn()
b = np.random.randn()

lr = 0.05 # learning rate
n_epoch = 200
lossHistory = []

for epoch in range(n_epoch):
    y_pred = w*x +b
    loss = ((y_pred - y)**2).mean()

    w = w - lr*2*((y_pred-y)*x).mean() # 공식
    b = b - lr*2*(y_pred-y).mean() # 공식
    lossHistory.append(loss)
    if epoch%10 == 0:
        print('epoch=', epoch, 'loss=', loss, 'w=', w,'b=',b)
print('-----------------------------------')
print('epoch=', epoch, 'loss=', loss,'w=',w,'b=',b)


In [None]:
plt.figure(figsize=(4,4))
plt.scatter(x,y)

xx = np.linspace(-5,5,100)
yy = w*xx + b
plt.plot(xx,yy,c='r')
plt.show()

fig = plt.figure()
plt.plot(np.arange(0,n_epoch), lossHistory)
fig.suptitle("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

## Training two features(x1,x2)

In [None]:
import numpy as np
import pandas as pd

n = 100
x1 = np.random.randn(n)
x2 = np.random.randn(n)

y = x1*30 + x2*40 + 50
y = y + np.random.randn(n)*20 # add noise

w1 = np.random.rand()
w2 = np.random.rand()
b = np.random.rand()

lr = 0.02
n_epoch = 200
lossHistory = []

for epoch in range(n_epoch):
    y_pred = w1*x1 + w2*x2 + b
    error = ((y_pred-y)**2).mean()

    w1 -= lr*((y_pred-y)*x1).mean()
    w2 -= lr*((y_pred-y)*x2).mean()
    b -= lr*(y_pred-y).mean()
    lossHistory.append(error)

print('----------------------------')
print('epoch=', epoch, 'error=', error, 'w1=', w1.round(2), 'w2=', w2.round(2), 'b=', b.round(2))


In [None]:
plt.figure(figsize=(8,4))
ax1 = plt.subplot(121,projection='3d')
ax1.scatter3D(x1,x2,y)

xx = np.linspace(-3,3,100)
yy = np.linspace(-2,2,100)
zz = w1*x1 + w2*x2 + b
ax1.plot(xx,yy,w1*xx+w2*yy+b,c='r')

ax2 = plt.subplot(122)
ax2.plot(np.arange(0,n_epoch),lossHistory)
ax2.set_title("Training Loss")
ax2.set_xlabel("Epoch #")
ax2.set_ylabel("Loss")

plt.subplots_adjust(wspace=0.5)
plt.show()

## Using regression function(Linear Regression)

In [None]:
from sklearn.linear_model import LinearRegression, SGDRegressor

n=100
x1,x2 = np.random.randn(n), np.random.randn(n)
y = x1*30+x2*40+50 + np.random.randn(n)*20
X = np.concatenate([x1.reshape(n,1), x2.reshape(n,1)], axis=1)

model = LinearRegression()
model.fit(X,y)

print("score: ", model.score(X,y))
print('w1=', model.coef_[0], 'w2=', model.coef_[1], 'b=', model.intercept_)

In [None]:
new_X = [1,3]
print(model.predict([new_X]))

z = np.random.rand(10,2)
model.predict(z)

In [None]:
w1,w2,b = model.coef_[0], model.coef_[1], model.intercept_
print("w1 = {}, w2 = {} , b = {}".format(w1,w2,b))

fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.scatter3D(x1,x2,y)

xx = np.linspace(-3,3,100)
yy = np.linspace(-2,2,100)
zz = w1*x1+w2*x2+b
ax.plot(xx,yy,w1*xx+w2*yy+b,c='r')

In [None]:
model = SGDRegressor()
model.fit(X,y)
print("score= ", model.score(X,y))
w1,w2,b = model.coef_[0], model.coef_[1], model.intercept_
print("w1 = {}, w2 = {}, b = {}".format(w1,w2,b))

In [None]:
X2 = np.concatenate([X, np.ones(len(X)).reshape(-1,1)], axis=1)
w_ols = (np.linalg.inv(X2.T.dot(X2))).dot(X2.T).dot(y)
w_ols

## Use make_regression function for training data

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
X,y = make_regression(n_samples=2000,n_features=2,noise=2.5,random_state=1)
X = StandardScaler().fit_transform(X)
print(X[:5], y[:5])

In [None]:
model = LinearRegression()
model.fit(X,y)
model.score(X,y)

# Classification(선형분류)

## Linear classification(use Cross Entropy as loss function)

- also called log loss(logistic regression)
- Logistic Regression
- Classification by Calculating parameters one by one

In [None]:
from sklearn.datasets import make_blobs

N = 500
(X,y) = make_blobs(n_samples=N, n_features=2, centers=2, cluster_std=3.0, random_state=17)
x1,x2 = X[:,0] , X[:,1]
plt.scatter(X[:,0], X[:,1], c=y)

In [None]:
w1 = np.random.randn()
w2 = np.random.randn()
b  = np.random.randn() 

def sigmoid_activation(z):
    return 1.0 / (1 + np.exp(-z))

lossHistory = []
epochs = 300
alpha = 0.01

for epoch in np.arange(epochs):
    preds = sigmoid_activation(w1*x1 + w2*x2 + b)       # prediction
    preds = np.clip(preds, 1e-9, 1.)                    # clip values for logarithm

    loss = -((y*np.log(preds) + (1-y)*np.log(1-preds))).mean()  # loss = cross entropy
    lossHistory.append(loss)
    
    dloss_dz = preds - y
    w1_deriv = dloss_dz * x1        # d(loss)/dw1 = d(loss)/dz * dz/dw1
    w2_deriv = dloss_dz * x2
    b_deriv = dloss_dz * 1
    
    w1 = w1 - (alpha * w1_deriv).mean()
    w2 = w2 - (alpha * w2_deriv).mean()
    b  = b  - (alpha * b_deriv).mean()

print(w1, w2, b)

In [None]:
accuracy = ((sigmoid_activation(w1*x1+w2*x2+b) > 0.5) == y).sum()/N
print(accuracy)

In [None]:
plt.scatter(X[:,0], X[:,1], c=y)
xx = np.linspace(-15,10,100)
yy = -w1/w2 * xx -b/w2
plt.plot(xx,yy)
plt.show()

fig = plt.figure()
plt.plot(np.arange(0,epochs), lossHistory)
fig.suptitle("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

## Use library function: SGDClassifier()

- default: hinge loss (used in SVM)

In [None]:
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.datasets import make_blobs
N = 500
(X,y) = make_blobs(n_samples=N, n_features=2, centers=2, cluster_std = 3.0, random_state=17)

In [None]:
X.shape, y.shape

In [None]:
clf = SGDClassifier()
clf.fit(X,y)
print("SGDClassifier: ", clf.score(X[:,:2],y))
print(clf.coef_, clf.intercept_)

log = LogisticRegression()
log.fit(X,y)
print("Logistic Regression: ", log.score(X[:,:2],y))
print(log.coef_, log.intercept_)
plt.ylim([-15.,5.])
plt.scatter(X[:,0], X[:,1], c=y)

w1,w2,b = clf.coef_[0,0], clf.coef_[0,1], clf.intercept_
xx = np.linspace(-15,5,100)
yy = -w1/w2 * xx -b/w2
plt.plot(xx,yy,c='r')

w1,w2,b = log.coef_[0,0], log.coef_[0,1], log.intercept_
xx = np.linspace(-15,5,100)
yy = -w1/w2 * xx -b/w2
plt.plot(xx,yy,c='b')

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
y_pred = log.predict(X)
confusion_matrix(y,y_pred)

In [None]:
print(classification_report(y,y_pred))

## Hinge Loss

In [None]:
N=500
(X,y_org) = make_blobs(n_samples=N,n_features=2,centers=2, cluster_std=2.0,random_state=17)
x1,x2 = X[:,0], X[:,1]
y = y_org.copy()
y[y==0] = -1
X[:5],y[:5], y_org[:5]

In [None]:
w1,w2,b = np.random.randn(), np.random.randn(), np.random.randn()
lossHistory = []
epochs = 300
alpha = 0.01

N = len(x1)
for epoch in np.arange(epochs):
    w1_deriv, w2_deriv, b_deriv, loss = 0.,0.,0.,0.
    for i in range(N):
        score = y[i]*(w1*x1[i] + w2*x2[i] + b)
        if score <= 1:
            w1_deriv -= x1[i]*y[i]
            w2_deriv -= x2[i]*y[i]
            b_deriv -= y[i]
            loss += 1-score
    w1_deriv /= float(N)
    w2_deriv /= float(N)
    b_deriv /= float(N)

    w1 -= alpha*w1_deriv
    w2 -= alpha*w2_deriv
    b -= alpha*b_deriv

    lossHistory.append(loss)

print(w1,w2,b)

accuracy = ((w1*x1 + w2*x2 + b > 0) == y_org).sum()/N
print(accuracy)
plt.plot(lossHistory)

In [None]:
plt.ylim([-15.,5.])
plt.scatter(X[:,0], X[:,1], c=y)

xx = np.linspace(-15,5,100)
yy = -w1/w2 * xx -b/w2
plt.plot(xx,yy,c='r')

# Normal Equation

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor

X,y = make_regression(
    n_samples=100,
    n_features=2,
    noise=10,
    random_state=1
)

plt.subplots(figsize=(8,5))
plt.scatter(X[:,0],y,marker='o')
plt.xlabel("Feature at Index 1")
plt.ylabel('Target')

In [None]:
X.shape, y.shape

In [None]:
X_sc = StandardScaler().fit_transform(X)

model = LinearRegression()
model.fit(X_sc,y)
print("coefficients and bias: ", model.coef_, model.intercept_)
model.score(X_sc,y)

model.predict(X_sc[:5])

- normal equation:
    - y=XW
    - W* = (X_{T}X){-1}X(T)y

In [None]:
# normal equation
X = X_sc # scaled (but, actually you don't need scaling here.)
X_b = np.concatenate([X, np.ones((X.shape[0],1))],axis=1)
w_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print("weight: ", w_best[:-1], "bias: ", w_best[-1])

# prediction for the first 5 (y= XW)
Xt = X[:5]
Xt_b = np.concatenate([Xt,np.ones((Xt.shape[0],1))],axis=1)
Xt_b.dot(w_best)

# Locally weighted Regression

In [None]:
import numpy as np
np.random.seed(8)

def train_gen(n_samples):
    X = np.random.randn(n_samples,1)
    y = 2*(X**3)+10+4.6*np.random.randn(n_samples,1)
    return X,y

In [None]:
# weighted matrix
def wm(point,X,tau):

    # tau --> bandwidth
    # X --> Training data.
    # point --> the x where we want to make the prediction

    m = X.shape[0] # Num of training examples
    w = np.mat(np.eye(m)) # Initializing W as an identity matrix.

    # Calculating weights for all training examples [x(i)'s]
    for i in range(m):
        xi = X[i]
        d = (-2*tau*tau)
        w[i,i] = np.exp(np.dot((xi-point), (xi-point).T)/d)
    return w

# Exercise

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression, Ridge, Lasso, SGDClassifier
from sklearn.datasets import make_regression, make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

In [None]:
def show_plot(model,X,i):
    df = pd.DataFrame(X)
    coef = pd.Series(model.coef_, df.columns)
    plt.subplot(1,3,i)
    coef.plot(kind='bar', title='model coefficieints')

In [None]:
X,y = make_regression(n_samples=100, n_features=10,noise=30,random_state=1)
X = StandardScaler().fit_transform(X)

clf1,clf2,clf3 = (Lasso(alpha = 30), LinearRegression(), Ridge(alpha=100))
clfs = [clf1,clf2,clf3]
plt.figure(figsize=(12,4))
for i,clf in enumerate(clfs,1):
    clf.fit(X,y)
    show_plot(clf,X,i)

In [None]:
(X,y) = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=2.0, random_state=17)
markers = ['o','+']
for i in range(2):
    xs = X[:,0][y==i]
    ys = X[:,1][y==i]
    plt.scatter(xs,ys,marker=markers[i])
plt.scatter(X[:,0], X[:,1], c=y)
clf = SGDClassifier()
clf.fit(X,y)
print(clf.score(X,y))
print(clf.coef_,clf.intercept_)
w1,w2,b = clf.coef_[0,0], clf.coef_[0,1], clf.intercept_[0]
xx = np.linspace(-10,2,100)
yy = -w1/w2 * xx -b/w2
plt.plot(xx,yy)