In [5]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import math

In [6]:
filename = "data_2d_line.csv"

In [7]:
df = pd.read_csv(filename, header=None)
df.head()

Unnamed: 0,0,1
0,6.1101,17.592
1,5.5277,9.1302
2,8.5186,13.662
3,7.0032,11.854
4,5.8598,6.8233


In [8]:
pts = df.to_numpy()
pts[0:5]

array([[ 6.1101, 17.592 ],
       [ 5.5277,  9.1302],
       [ 8.5186, 13.662 ],
       [ 7.0032, 11.854 ],
       [ 5.8598,  6.8233]])

### Least Square

In [20]:
def lineLeastSquare(points):
    x = np.array([points[:,0]]).T
    y = np.array([points[:,1]]).T
    
    x_mean = np.mean(x, axis=0)
    y_mean = np.mean(y, axis=0)
    
    m = np.dot(np.transpose(x - x_mean), y - y_mean) / np.dot(np.transpose(x - x_mean), x - x_mean)
    b = y_mean - x_mean * m
    
    return b[0,0], m[0,0]

### Gradient Descent

In [21]:
def lineGradientDescent(points):
    theta = np.array([np.zeros(2)]).T
    alpha = 0.01
    num_iters = 1500
    m = points.shape[0]
    
    X = np.array([np.ones(m),points[:,0]]).T
    y = np.array([points[:,1]]).T
    
    for i in range(num_iters):
        err = np.dot(X,theta) - y
        delta = np.dot(np.transpose(X), err) / m
        theta = theta - alpha * delta
        
    return theta[0,0], theta[1,0]

### Plot points and best fit lines 

In [22]:
b1, m1 = lineLeastSquare(pts)
b2, m2 = lineGradientDescent(pts)
b1, m1, b2, m2

(-3.8957808783118537, 1.193033644189594, -3.63029143940436, 1.166362350335582)

In [30]:
fig=go.Figure()

# points
scatter = go.Scatter(
    x=pts[:,0],
    y=pts[:,1],
    mode="markers",
    marker={
        'size':4,
        'color':'red'},
    name="points"
)
fig.add_trace(scatter)

#
x_min = math.floor(np.min(pts[:,0], axis=0))
x_max = math.ceil(np.max(pts[:,0], axis=0))
ls_y_min = b1 + m1 * x_min
ls_y_max = b1 + m1 * x_max
gd_y_min = b2 + m2 * x_min
gd_y_max = b2 + m2 * x_max

fig.add_shape(type="line",
    x0=x_min, y0=ls_y_min, x1=x_max, y1=ls_y_max,
    line=dict(color="RoyalBlue",width=2)
)
    
fig.add_shape(type="line",
    x0=x_min, y0=gd_y_min, x1=x_max, y1=gd_y_max,
    line=dict(
        color="green",
        width=2,
        dash="dashdot")
)

fig.update_yaxes(scaleanchor = "x", scaleratio = 1)

fig.show()

In [24]:
x_min, x_max, ls_y_min, ls_y_max, gd_y_min, gd_y_max

(5,
 23,
 2.069387342636116,
 23.543992938048806,
 2.20152031227355,
 23.196042618314028)