In [46]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import math

In [2]:
filename = "data_2d_line.csv"

In [168]:
df = pd.read_csv(filename, header=None)
df.head()

Unnamed: 0,0,1
0,6.1101,17.592
1,5.5277,9.1302
2,8.5186,13.662
3,7.0032,11.854
4,5.8598,6.8233


In [4]:
pts = df.to_numpy()
pts[0:5]

array([[ 6.1101, 17.592 ],
       [ 5.5277,  9.1302],
       [ 8.5186, 13.662 ],
       [ 7.0032, 11.854 ],
       [ 5.8598,  6.8233]])

### Least Square

In [172]:
x = np.array([pts[:,0]]).T
y = np.array([pts[:,1]]).T

In [176]:
x_mean = np.mean(x, axis=0)
y_mean = np.mean(y, axis=0)
x_min = math.floor(np.min(x, axis=0))
x_max = math.ceil(np.max(x, axis=0))

In [183]:
m = np.dot(np.transpose(x - x_mean), y - y_mean) / np.dot(np.transpose(x - x_mean), x - x_mean)
m

array([[1.19303364]])

In [184]:
b = y_mean - x_mean * m
b

array([[-3.89578088]])

### Gradient Descent

In [159]:
theta = np.array([np.zeros(2)]).T
alpha = 0.01
num_iters = 1500
m = pts.shape[0]
X = np.array([np.ones(m),pts[:,0]]).T
y = np.array([pts[:,1]]).T

In [166]:
#check dimension
print('Dimensions of X, y, theta: ' + str(X.shape) + ", " + str(y.shape) + ", " + str(theta.shape))

Dimensions of X, y, theta: (97, 2), (97, 1), (2, 1)


In [163]:
for i in range(num_iters):
    err = np.dot(X,theta) - y
    delta = np.dot(np.transpose(X), err) / m
    theta = theta - alpha * delta

In [167]:
theta

array([[-3.63029144],
       [ 1.16636235]])

### Plot points and best fit lines 

In [196]:
b[0],m[0], theta[0], theta[1]

(array([-3.89578088]),
 array([1.19303364]),
 array([-3.63029144]),
 array([1.16636235]))

In [198]:
fig=go.Figure()

# points
scatter = go.Scatter(
    x=pts[:,0],
    y=pts[:,1],
    mode="markers",
    marker={
        'size':4,
        'color':'red'},
    name="points"
)
fig.add_trace(scatter)

# best line
best_x = np.linspace(x_min, x_max, 10)
best_y = b[0] + m[0] * best_x
scatter_best_line1 = go.Scatter(
    x = best_x,
    y = best_y,
    mode='lines',
    line={
        'width':1,
        'color':'blue',
        'dash':'solid',},
    name="Best Line(Least Square)"
)
fig.add_trace(scatter_best_line1)

# best line (gradient descent)
best_x = np.linspace(x_min, x_max, 10)
best_y = theta[0] + theta[1] * best_x
scatter_best_line2 = go.Scatter(
    x = best_x,
    y = best_y,
    mode='lines',
    line={
        'width':1,
        'color':'green',
        'dash':'dot',},
    name="Best Line(Gradient Descent)"
)
fig.add_trace(scatter_best_line2)

fig.show()