In [1]:
import plotly.graph_objects as go
import numpy as np
import pandas as pd

In [2]:
pd_dataframe = pd.read_csv('ex1.csv')  # Read a CSV file into a DataFrame
x_col = pd_dataframe['x']  # Extract the 'x' column
y_col = pd_dataframe['y']  # Extract the 'y' column

In [3]:
fig = go.Figure()  # Create a new figure
fig.add_trace(go.Scatter(x=x_col,  # Add a scatter plot trace
                         y=y_col, 
                         mode='markers',  # Use marker style for points
                         marker=dict(symbol='x')))  # Set marker style
fig.update_xaxes(title='x')  # Label for the x-axis
fig.update_yaxes(title='y', tickangle=0)  # Label for the y-axis and set tick angle
fig.update_layout(width=600, height=400,  # Update layout with width and height
                  title_text='Visualize data of ex1.csv file',  # Set plot title
                  showlegend=False)  # Hide legend
fig.show()  # Display the plot

In [4]:
x_col = pd_dataframe['x'].values.reshape(-1, 1)  # Reshape 'x' to a column vector
ones_array = np.ones_like(x_col).reshape(-1, 1)  # Create an array of ones for the intercept term
x_test = x_col.reshape(-1, )
x_col = np.hstack((ones_array, x_col))  # Combine ones array with 'x' values to form [1, x]
y_col = pd_dataframe['y'].values.reshape(-1, 1)  # Reshape 'y' to a column vector

In [5]:
theta = np.array([[np.random.random()], [np.random.random()]])  # Initialize random parameters for linear regression

In [6]:
def predict(*, theta: np.array, X: np.array) -> np.ndarray:
    # Compute the hypothesis (predicted values) as the dot product of X and theta
    H = np.matmul(X, theta)
    return H

In [7]:
def compute_cost_function(*, H: np.ndarray, Y: np.ndarray) -> np.ndarray:
    m = len(H)  # Number of training examples
    E = H - Y  # Error between predicted and actual values
    # Calculate the cost function J using mean squared error
    J = np.dot(E.T, E) / (2 * m)
    return J[0, 0]  # Return the scalar cost value

In [8]:
def update(*, theta: np.ndarray, X: np.ndarray, H: np.ndarray, Y: np.ndarray, learning_rate: float) -> np.ndarray:
    E = H - Y  # Error between predicted and actual values
    m = len(X)  # Number of training examples
    # Update theta using the gradient descent formula
    theta_updated = theta - (learning_rate / m) * np.matmul(X.T, E)
    return theta_updated

In [9]:
def update_params(*, iter: int, theta: np.ndarray, X: np.ndarray, Y: np.ndarray, learning_rate: float):
    J = []  # Initialize a list to store the cost for each iteration
    for i in range(iter):
        # Predict the output using the current parameters
        H = predict(theta=theta, X=x_col)
        # Compute and store the cost function value
        J.append(compute_cost_function(H=H, Y=y_col))
        # Update the parameters using gradient descent
        theta = update(theta=theta, X=x_col, H=predict(theta=theta, X=x_col), Y=y_col, learning_rate=learning_rate)

    return theta, J  # Return the final parameters and the cost history

theta = np.array([[np.random.random()], [np.random.random()]])  # Initialize random parameters for linear regression
# Initialize the model and perform parameter updates
theta_updated, J = update_params(iter=1000, theta=theta, X=x_col, Y=y_col, learning_rate=0.0001)

In [10]:
# Define different learning rates to experiment with during gradient descent
learning_ = [0.000001, 0.000001, 0.0000025, 0.0000034]

# Generate a range of values from 1 to 1000 to represent the number of iterations for plotting
x_range = np.linspace(1, 1000, 1000)

# Initialize an array to store the cost values (J) for each learning rate across 1000 iterations
list_J = np.zeros((1000, 4))

# Create a new Plotly figure for visualizing the cost function over iterations for different learning rates
fig = go.Figure()

# Iterate over the list of learning rates to compute and plot the cost function for each one
for i, value in enumerate(learning_):
    # Update parameters using gradient descent for each learning rate and store the resulting cost values
    _, J_ = update_params(iter=1000, theta=theta, X=x_col, Y=y_col, learning_rate=value)
    
    # Store the computed cost values for this learning rate in the corresponding column of list_J
    list_J[:, i] = J_
    
    # Add a line plot to the figure for this learning rate, showing the cost function over iterations
    fig.add_trace(
        go.Scatter(
            x=x_range,  # X-axis: Number of iterations
            y=list_J[:, i],  # Y-axis: Cost function values
            mode='lines',  # Plot as a line
            name=f'Learning rate: {value}'  # Label for the legend
        )
    )

# Update the X-axis title of the plot
fig.update_xaxes(title='iter')

# Update the Y-axis title and set the tick angle to 0 for better readability
fig.update_yaxes(title='cost value', tickangle=0)

# Update the layout of the plot with a title, size, and legend
fig.update_layout(
    width=1000,  # Set the width of the plot
    height=500,  # Set the height of the plot
    title_text='Cost Vs Num of Iterations',  # Title of the plot
    showlegend=True  # Show legend
)

# Display the figure with the cost function plots
fig.show()


In [12]:
# Generate a range of x values from the minimum to the maximum of the x_col to plot the regression line
x_range = np.linspace(x_col.min(), x_col.max(), 100)

# Compute the corresponding y values for the regression line using the updated parameters (theta)
# The equation of the line is y = theta_0 + theta_1 * x
y_range = theta_updated[0] + theta_updated[1] * x_range
y_range_2 = theta_true[0] + theta_true[1] * x_range + theta_true[2]*x_range**2

# Create a new Plotly figure to visualize the data points and the regression line
fig = go.Figure()

# Add a scatter plot for the original data points from the CSV file
fig.add_trace(
    go.Scatter(
        x=pd_dataframe['x'],  # X values from the 'x' column
        y=pd_dataframe['y'],  # Y values from the 'y' column
        mode='markers',  # Plot style as markers (points)
        marker=dict(symbol='x'),  # Marker style
        name='Data Points'  # Name for the legend
    )
)

# Add a line plot for the regression line calculated from the model
fig.add_trace(
    go.Scatter(
        x=x_range,  # X values for the regression line
        y=y_range,  # Y values for the regression line
        mode='lines',  # Plot style as a line
        name='Regression Line'  # Name for the legend
    )
)

fig.add_trace(
    go.Scatter(
        x=x_range,  # X values for the regression line
        y=y_range_2,  # Y values for the regression line
        mode='lines',  # Plot style as a line
        name='Regression Line'  # Name for the legend
    )
)

# Update the X-axis title of the plot
fig.update_xaxes(title='x')

# Update the Y-axis title and set the tick angle to 0 for better readability
fig.update_yaxes(title='y', tickangle=0)

# Update the layout of the plot with a title, size, and legend
fig.update_layout(
    width=800,  # Set the width of the plot
    height=500,  # Set the height of the plot
    title_text='Visualize data of ex1.csv file',  # Title of the plot
    showlegend=True  # Show legend
)

# Display the figure with data points and the regression line
fig.show()
