In [63]:
import plotly.graph_objects as go
import numpy as np
import pandas as pd

In [64]:
pd_dataframe = pd.read_csv('ex1.csv')
x_col = pd_dataframe['x']
y_col = pd_dataframe['y']

In [65]:
fig = go.Figure()
fig.add_trace(go.Scatter( x= x_col,
                          y= y_col, 
                          mode= 'markers', 
                          marker= dict(symbol= 'x')))
fig.update_xaxes(title = 'x')
fig.update_yaxes(title = 'y', tickangle= 0)
fig.update_layout(width = 600, height= 400, title_text= 'Visualize data of ex1.csv file', showlegend= False)
fig.show()

In [66]:
x_col = pd_dataframe['x'].values.reshape(-1, 1)
ones_array = np.ones_like(x_col).reshape(-1, 1)

x_col = np.hstack((ones_array, x_col))
y_col = pd_dataframe['y'].values.reshape(-1, 1)

In [67]:
theta = np.array([[np.random.random()], [np.random.random()]])
theta

array([[0.79081349],
       [0.12231412]])

In [68]:
def predict(*, theta: np.array, X: np.array) -> np.ndarray:
    H = np.matmul(X, theta)
    return H

In [69]:
def compute_cost_function(*, H: np.ndarray, Y: np.ndarray) -> np.ndarray:
    m = len(H)
    E = H - Y
    J = np.dot(E.T, E) / (2*m)
    return J[0, 0]

In [70]:
def update(*, theta: np.ndarray, X: np.ndarray, H: np.ndarray, Y: np.ndarray, learning_rate: float) -> np.ndarray:
    E = H - Y
    m = len(X)
    theta_updated = theta - (learning_rate / m) * np.matmul(X.T, E) 
    return theta_updated

In [71]:
def update_params(*, iter: int, theta: np.ndarray, X: np.ndarray, Y: np.ndarray, learning_rate: float):
    J = []
    for i in range(iter):
        H = predict(theta= theta, X= x_col)
        J.append(compute_cost_function(H= H, Y= y_col))
        theta = update(theta= theta, X= x_col, H= predict(theta= theta, X= x_col), Y= y_col, learning_rate= learning_rate)

    return theta, J

theta_updated, J = update_params(iter= 1000, theta= theta, X= x_col, Y= y_col, learning_rate= 0.0001)

In [72]:
learning_ = [0.000001, 0.000001, 0.0000025, 0.0000034]
x_range = np.linspace(1, 1000, 1000)

list_J = np.zeros((1000, 4))

fig = go.Figure()
for i, value in enumerate(learning_):
    _ , J_ = update_params(iter= 1000, theta= theta, X= x_col, Y= y_col, learning_rate= value)
    list_J[:, i] = J_
    fig.add_trace(go.Scatter(x=x_range, y=list_J[:, i], mode='lines', name= f'Regression Line {i} - Learning rate: {value}'))

fig.update_xaxes(title = 'x')
fig.update_yaxes(title = 'y', tickangle= 0)
fig.update_layout(width = 1000, height= 500, title_text= 'Visualize data of ex1.csv file', showlegend= True)
fig.show()

In [73]:
x_range = np.linspace(x_col.min(), x_col.max(), 100)
y_range = theta_updated[0] + theta_updated[1] * x_range


fig = go.Figure()
fig.add_trace(go.Scatter(x= pd_dataframe['x'], y= pd_dataframe['y'], mode= 'markers', marker= dict(symbol= 'x'), name= 'Data Points'))
fig.add_trace(go.Scatter(x=x_range, y=y_range, mode='lines', name= 'Regression Line'))
fig.update_xaxes(title = 'x')
fig.update_yaxes(title = 'y', tickangle= 0)
fig.update_layout(width = 800, height= 500, title_text= 'Visualize data of ex1.csv file', showlegend= True)
fig.show()