In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time

In [2]:
def function(*, input: np.ndarray, theta: np.ndarray) -> np.ndarray:
    ones = np.ones(len(input)).reshape(-1, 1)
    x_add = np.hstack((ones, input))
    y = np.sum(x_add * theta.reshape(-1,), axis= 1)
    return y

def plot_diagram(*, name_1: str, name_2: str, mode_1: str, mode_2: str,
                  x_1: np.array, y_1: np.array, x_2: np.array, y_2: np.array):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x_1,  y=y_1, mode= mode_1, name= name_1))
    fig.add_trace(go.Scatter(x=x_2,  y=y_2, mode= mode_2, name= name_2))
    fig.show()

def normalized(vector: np.ndarray) -> np.ndarray:
    mean = np.mean(vector)  # Mean of input vector
    standard_deviation = np.std(vector) # std of input vector
    normalized_vector = (vector - mean) / standard_deviation # linear transform vector by using gauss
    return normalized_vector

def convert_to_new_order(*, input: np.ndarray, order: int) -> np.ndarray:
    """
        Convert vector x into matrix x with multiple order
    """
    input = input.reshape(-1,)
    ones = np.ones((len(input), order))
    for i in range(0, order):
        ones[:, i] = input**(i+1)
    return ones

def true_value_theta(*, input: np.ndarray, output: np.ndarray) -> np.ndarray:
    """
        Compute true value of theta 
        theta =   (X.T*X)^-1 * (X.T * Y)
    """
    ones = np.ones(len(input)).reshape(-1, 1)
    x_add = np.hstack((ones, input))
    inv_XT_X = np.linalg.inv(np.dot(x_add.T, x_add))
    XT_Y = np.dot(x_add.T, output.reshape(-1, 1))
    theta_true = np.dot(inv_XT_X, XT_Y)
    return theta_true

def convert_theta_nomr_to_theta(*, theta_normalized: np.ndarray, input: np.ndarray, output: np.ndarray) -> np.ndarray:
    theta = np.zeros_like(theta_normalized)

    mean_x = np.mean(input,  axis= 0)
    std_x = np.std(input,  axis= 0)

    mean_y = np.mean(output, axis= 0)
    std_y = np.std(output, axis= 0)

    theta[1:] = std_y*theta_normalized[1:]/(std_x[1:].reshape(-1, 1))
    theta[0] = mean_y + std_y*theta_normalized[0] - np.dot(std_y*mean_x[1:]/std_x[1:], theta_normalized[1:])

    return theta

In [3]:
class Linear_Regression_Multivariables:
    def __init__(self, *, number_of_feature: int) -> None:
        self.number_of_features = number_of_feature

    def compute_normalized_vector(self, vector: np.ndarray) -> np.ndarray:
        mean = np.mean(vector)  # Mean of input vector
        standard_deviation = np.std(vector) # std of input vector
        normalized_vector = (vector - mean) / standard_deviation # linear transform vector by using gauss
        return normalized_vector
    
    def apply_normalize_matrix(self, *, input: np.ndarray, output: np.ndarray) -> np.ndarray:

        normalized_input = np.apply_along_axis(func1d= self.compute_normalized_vector, arr= input, axis= 0)
        normalized_input = normalized_input.reshape(-1, self.number_of_features)

        normalized_output = np.apply_along_axis(func1d= self.compute_normalized_vector, arr= output, axis= 0)
        normalized_output = normalized_output.reshape(-1, 1)
        return normalized_input, normalized_output

    def add_ones_columns(self, *, normalized_input: np.ndarray) -> np.ndarray:
        ones = np.ones(len(normalized_input)).reshape(-1, 1)
        x_add = np.hstack((ones, normalized_input))
        return x_add

    def predict(self, *, theta: np.ndarray, normalized_input: np.ndarray) -> np.ndarray:
        y_pred = np.matmul(normalized_input, theta)
        return y_pred
    
    def compute_loss_function(self, *, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        m = len(y_true)
        E = y_pred - y_true
        J = np.sum((E)**2)/ (2*m)
        return J
    
    def update_params(self, *, theta: np.ndarray, lr: float, y_pred: np.ndarray, 
                      y_true: np.ndarray, normalized_input: np.ndarray) -> np.ndarray:
        m = len(y_true)
        E = y_pred - y_true
        dJ_dtheta = np.dot(normalized_input.T, E) / (m)
        theta_updated = theta - lr*dJ_dtheta
        return theta_updated
    
    def convert_theta_nomr_to_theta(self, *, theta_normalized: np.ndarray, input: np.ndarray, output: np.ndarray) -> np.ndarray:
        theta = np.zeros_like(theta_normalized)

        mean_x = np.mean(input,  axis= 0)
        std_x = np.std(input,  axis= 0)

        mean_y = np.mean(output, axis= 0)
        std_y = np.std(output, axis= 0)

        theta[1:] = std_y*theta_normalized[1:]/(std_x.reshape(-1, 1))
        theta[0] = mean_y + std_y*theta_normalized[0] - np.dot(std_y*mean_x/std_x, theta_normalized[1:])

        return theta
    
    def train(self, *, epoch: int, theta: np.ndarray, input: np.ndarray, 
              output: np.ndarray, lr: float) -> np.ndarray:
        
        normalized_input, normalized_ouput = self.apply_normalize_matrix(input= input, output= output)
        normalized_input_add_ones = self.add_ones_columns(normalized_input= normalized_input)
        
        J_array = np.array([])
        for i in range(epoch):
            y_pred = self.predict(theta= theta, 
                                  normalized_input= normalized_input_add_ones)
            J = self.compute_loss_function(y_true= normalized_ouput, 
                                           y_pred= y_pred)
            theta = self.update_params(theta= theta, lr= lr, y_pred= y_pred, 
                                       y_true= normalized_ouput, normalized_input= normalized_input_add_ones)
            J_array = np.append(arr= J_array, values= J)
        
        theta = self.convert_theta_nomr_to_theta(theta_normalized= theta, input= input, output= output)

        return J_array, theta



### ex2

In [4]:
# Read csv file ex2.csv
pd_ex2 = pd.read_csv('ex2.csv')

# Get collumns of file 
X_cols = pd_ex2.columns[:-1]
Y_col = pd_ex2.columns[-1]

In [5]:
# Get vector input and output
X = pd_ex2[X_cols].values
Y = pd_ex2[Y_col].values

In [6]:
theta_init = np.random.randn(len(X_cols) + 1, 1)
theta_init.shape

(9, 1)

In [7]:
### 2. Training 
lrm = Linear_Regression_Multivariables(number_of_feature= 8)
J_array_ex_2, theta_ex_2 = lrm.train(
    epoch= 500000, theta= theta_init, input= X,
    output= Y, lr= 0.001
)

In [8]:
theta_real_ = true_value_theta(input= X, output= Y)
X_add = lrm.add_ones_columns(normalized_input= X)

In [9]:
theta_real_.shape

(9, 1)

In [10]:
# plot_diagram(name_1= 'a', name_2= 'b', mode_1= 'markers', mode_2= 'markers',
#              x_1= X.reshape(-1, ), y_1= Y.reshape(-1, ), 
#              x_2= X.reshape(-1, ), y_2= np.dot(X_add, theta_ex_2).reshape(-1, ))

In [11]:
np.round((theta_real_ - theta_ex_2) / theta_real_, 2)

array([[ 0.  ],
       [ 0.  ],
       [ 0.  ],
       [ 0.  ],
       [ 0.  ],
       [ 0.  ],
       [-0.  ],
       [-0.01],
       [ 0.  ]])

### ex3

In [12]:
# Read csv file ex2.csv
pd_ex3 = pd.read_csv('ex3.csv')

# Get collumns of file 
X_cols = pd_ex3.columns[:-1]
Y_col = pd_ex3.columns[-1]

In [13]:
# Get vector input and output
x_value = pd_ex3[X_cols].values
y_value = pd_ex3[Y_col].values

In [14]:
order= 4

X = convert_to_new_order(input= x_value, order= order)
Y = y_value.reshape(-1, 1)

In [15]:
theta_init = np.random.randn(order + 1, 1)
theta_init.shape

(5, 1)

In [16]:
### 2. Training 
lrm = Linear_Regression_Multivariables(number_of_feature= order)
J_array_ex_3, theta_ex_3 = lrm.train(
    epoch= 100000, theta= theta_init, input= X,
    output= Y, lr= 0.01
)

In [17]:
plot_diagram(name_1= 'a', name_2= 'b', mode_1= 'markers', mode_2= 'lines',
             x_1= x_value.reshape(-1, ), y_1= y_value.reshape(-1, ), 
             x_2= x_value.reshape(-1, ), y_2= function(input= X, theta= theta_ex_3))

In [18]:
theta_true = true_value_theta(input= X, output= Y)

In [19]:
plot_diagram(name_1= 'a', name_2= 'b', mode_1= 'markers', mode_2= 'lines',
             x_1= x_value.reshape(-1, ), y_1= y_value.reshape(-1, ), 
             x_2= x_value.reshape(-1, ), y_2= function(input= X, theta= theta_true))

In [20]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt

def predict_output(*, X: np.ndarray, theta: np.ndarray) -> np.ndarray:
    """
    Predicts output values using the input features and model parameters.

    Args:
        X (np.ndarray): Input features.
        theta (np.ndarray): Model parameters (theta).

    Returns:
        np.ndarray: Predicted output values.
    """
    ones = np.ones(len(X)).reshape(-1, 1)  # Add a column of ones for the intercept term
    X_with_ones = np.hstack((ones, X))  # Combine input with ones
    y_pred = np.sum(X_with_ones * theta.reshape(-1,), axis=1)  # Compute predictions
    return y_pred

def plot_graph(*, title_1: str, title_2: str, style_1: str, style_2: str,
               x1: np.ndarray, y1: np.ndarray, x2: np.ndarray, y2: np.ndarray):
    """
    Plots two sets of data points using Plotly.

    Args:
        title_1 (str): Name of the first plot line.
        title_2 (str): Name of the second plot line.
        style_1 (str): Plot style for the first line (e.g., 'markers').
        style_2 (str): Plot style for the second line (e.g., 'lines').
        x1, y1, x2, y2 (np.ndarray): Data points for both lines.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x1, y=y1, mode=style_1, name=title_1))
    fig.add_trace(go.Scatter(x=x2, y=y2, mode=style_2, name=title_2))
    fig.show()

def normalize_vector(vector: np.ndarray) -> np.ndarray:
    """
    Normalizes a vector to have zero mean and unit variance.

    Args:
        vector (np.ndarray): Input vector.

    Returns:
        np.ndarray: Normalized vector.
    """
    mean = np.mean(vector)  # Calculate the mean
    std = np.std(vector)  # Calculate the standard deviation
    normalized_vector = (vector - mean) / std  # Normalize the vector
    return normalized_vector

def create_polynomial_features(*, X: np.ndarray, degree: int) -> np.ndarray:
    """
    Converts an input vector into a matrix of polynomial features.

    Args:
        X (np.ndarray): Input feature vector.
        degree (int): Degree of the polynomial.

    Returns:
        np.ndarray: Matrix of polynomial features.
    """
    X = X.reshape(-1,)
    poly_matrix = np.ones((len(X), degree))  # Initialize the matrix with ones
    for i in range(degree):
        poly_matrix[:, i] = X ** (i + 1)  # Raise each column to the respective power
    return poly_matrix

def compute_true_theta(*, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
    """
    Computes the true value of theta using the normal equation.

    Args:
        X (np.ndarray): Input feature matrix.
        Y (np.ndarray): Output vector.

    Returns:
        np.ndarray: Computed theta values.
    """
    ones = np.ones(len(X)).reshape(-1, 1)
    X_with_ones = np.hstack((ones, X))  # Add a column of ones for the intercept term
    inv_XT_X = np.linalg.inv(np.dot(X_with_ones.T, X_with_ones))  # Compute (X.T * X)^-1
    XT_Y = np.dot(X_with_ones.T, Y.reshape(-1, 1))  # Compute (X.T * Y)
    theta_true = np.dot(inv_XT_X, XT_Y)  # Compute theta
    return theta_true

def denormalize_theta(*, theta_norm: np.ndarray, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
    """
    Converts normalized theta values back to the original scale.

    Args:
        theta_norm (np.ndarray): Normalized theta values.
        X (np.ndarray): Input features.
        Y (np.ndarray): Output values.

    Returns:
        np.ndarray: Denormalized theta values.
    """
    theta = np.zeros_like(theta_norm)

    mean_X = np.mean(X, axis=0)
    std_X = np.std(X, axis=0)

    mean_Y = np.mean(Y, axis=0)
    std_Y = np.std(Y, axis=0)

    theta[1:] = std_Y * theta_norm[1:] / std_X.reshape(-1, 1)
    theta[0] = mean_Y + std_Y * theta_norm[0] - np.dot(std_Y * mean_X / std_X, theta_norm[1:])
    return theta

class LinearRegressionMulti:
    """
    A class representing a linear regression model with multiple variables.
    """
    def __init__(self, *, num_features: int) -> None:
        """
        Initializes the Linear Regression model.

        Args:
            num_features (int): Number of features in the input data.
        """
        self.num_features = num_features

    def normalize_vector(self, vector: np.ndarray) -> np.ndarray:
        """
        Normalizes the input vector.

        Args:
            vector (np.ndarray): Input vector.

        Returns:
            np.ndarray: Normalized input vector.
        """
        mean = np.mean(vector)
        std = np.std(vector)
        return (vector - mean) / std

    def normalize_input_output(self, *, X: np.ndarray, Y: np.ndarray) -> tuple:
        """
        Normalizes the input features and output values.

        Args:
            X (np.ndarray): Input features.
            Y (np.ndarray): Output values.

        Returns:
            tuple: Normalized input features and output values.
        """
        norm_X = np.apply_along_axis(self.normalize_vector, arr=X, axis=0).reshape(-1, self.num_features)
        norm_Y = np.apply_along_axis(self.normalize_vector, arr=Y, axis=0).reshape(-1, 1)
        return norm_X, norm_Y

    def add_intercept_column(self, *, X: np.ndarray) -> np.ndarray:
        """
        Adds a column of ones to the input features for the intercept term.

        Args:
            X (np.ndarray): Normalized input features.

        Returns:
            np.ndarray: Input features with an added column of ones.
        """
        ones = np.ones(len(X)).reshape(-1, 1)
        return np.hstack((ones, X))

    def predict(self, *, theta: np.ndarray, X: np.ndarray) -> np.ndarray:
        """
        Predicts the output using the model parameters.

        Args:
            theta (np.ndarray): Model parameters.
            X (np.ndarray): Input features.

        Returns:
            np.ndarray: Predicted output values.
        """
        return np.matmul(X, theta)

    def compute_loss(self, *, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        """
        Computes the loss function (Mean Squared Error).

        Args:
            y_true (np.ndarray): True output values.
            y_pred (np.ndarray): Predicted output values.

        Returns:
            float: Computed loss value.
        """
        m = len(y_true)
        error = y_pred - y_true
        loss = np.sum(error ** 2) / (2 * m)
        return loss

    def update_parameters(self, *, theta: np.ndarray, lr: float, y_pred: np.ndarray, 
                          y_true: np.ndarray, X: np.ndarray) -> np.ndarray:
        """
        Updates the model parameters using gradient descent.

        Args:
            theta (np.ndarray): Current model parameters.
            lr (float): Learning rate for parameter updates.
            y_pred (np.ndarray): Predicted output values.
            y_true (np.ndarray): True output values.
            X (np.ndarray): Input features.

        Returns:
            np.ndarray: Updated model parameters.
        """
        m = len(y_true)
        error = y_pred - y_true
        gradient = np.dot(X.T, error) / m
        return theta - lr * gradient

    def denormalize_theta(self, *, theta_norm: np.ndarray, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
        """
        Denormalizes the normalized theta values.

        Args:
            theta_norm (np.ndarray): Normalized theta values.
            X (np.ndarray): Input features.
            Y (np.ndarray): Output values.

        Returns:
            np.ndarray: Denormalized theta values.
        """
        return denormalize_theta(theta_norm=theta_norm, X=X, Y=Y)

    def train(self, *, epochs: int, theta: np.ndarray, X: np.ndarray, 
              Y: np.ndarray, lr: float) -> tuple:
        """
        Trains the Linear Regression model using gradient descent.

        Args:
            epochs (int): Number of training iterations.
            theta (np.ndarray): Initial model parameters.
            X (np.ndarray): Input features.
            Y (np.ndarray): Output values.
            lr (float): Learning rate for parameter updates.

        Returns:
            tuple: Array of loss values and the trained model parameters.
        """
        norm_X, norm_Y = self.normalize_input_output(X=X, Y=Y)
        X_with_ones = self.add_intercept_column(X=norm_X)

        loss_history = np.array([])
        for _ in range(epochs):
            y_pred = self.predict(theta=theta, X=X_with_ones)
            loss = self.compute_loss(y_true=norm_Y, y_pred=y_pred)
            theta = self.update_parameters(theta=theta, lr=lr, y_pred=y_pred, 
                                           y_true=norm_Y, X=X_with_ones)
            loss_history = np.append(loss_history, loss)

        theta = self.denormalize_theta(theta_norm=theta, X=X, Y=Y)
        return loss_history, theta

# Example 2: Linear Regression with Multiple Variables
# Read CSV file 'ex2.csv'
data_ex2 = pd.read_csv('ex2.csv')

# Get columns of the file
X_columns = data_ex2.columns[:-1]  # Feature columns
Y_column = data_ex2.columns[-1]    # Target column

# Get input and output vectors
X = data_ex2[X_columns].values
Y = data_ex2[Y_column].values

# Initialize theta
theta_initial = np.random.randn(len(X_columns) + 1, 1)

# Train the model
model = LinearRegressionMulti(num_features=8)
loss_history_ex2, theta_ex2 = model.train(
    epochs=500000, theta=theta_initial, X=X, Y=Y, lr=0.001
)

# Compute the real theta using the normal equation
theta_real = compute_true_theta(X=X, Y=Y)
X_with_ones = model.add_intercept_column(X=X)

# Plot the comparison
# plot_graph(title_1='Actual', title_2='Predicted', style_1='markers', style_2='markers',
#            x1=X.reshape(-1,), y1=Y.reshape(-1,),
#            x2=X.reshape(-1,), y2=np.dot(X_with_ones, theta_ex_2).reshape(-1,))

# Compare the estimated theta with the true theta
theta_difference = np.round((theta_real - theta_ex_2) / theta_real, 2)

# Example 3: Polynomial Regression
# Read CSV file 'ex3.csv'
data_ex3 = pd.read_csv('ex3.csv')

# Get columns of the file
X_columns = data_ex3.columns[:-1]  # Feature columns
Y_column = data_ex3.columns[-1]    # Target column

# Get input and output vectors
x_values = data_ex3[X_columns].values
y_values = data_ex3[Y_column].values

# Set the order of the polynomial
degree = 4

# Create polynomial features
X_poly = create_polynomial_features(X=x_values, degree=degree)
Y_poly = y_values.reshape(-1, 1)

# Initialize theta
theta_initial = np.random.randn(degree + 1, 1)

# Train the model
model_poly = LinearRegressionMulti(num_features=degree)
loss_history_ex3, theta_ex3 = model_poly.train(
    epochs=100000, theta=theta_initial, X=X_poly, Y=Y_poly, lr=0.01
)

# Plot the results
plot_graph(title_1='Actual', title_2='Predicted', style_1='markers', style_2='lines',
           x1=x_values.reshape(-1,), y1=y_values.reshape(-1,),
           x2=x_values.reshape(-1,), y2=predict_output(X=X_poly, theta=theta_ex3))

# Compute the true theta for polynomial regression
theta_true = compute_true_theta(X=X_poly, Y=Y_poly)

# Plot the true vs. predicted values
plot_graph(title_1='Actual', title_2='True Prediction', style_1='markers', style_2='lines',
           x1=x_values.reshape(-1,), y1=y_values.reshape(-1,),
           x2=x_values.reshape(-1,), y2=predict_output(X=X_poly, theta=theta_true))


In [21]:
a = np.array([1, 2, 3])

In [38]:
import plotly.graph_objects as go
import numpy as np


# Example array; replace 'a' with your actual data
a = np.linspace(1, 10, 100)

for i in range(10):
    clear_output(wait=True)  # Clear the previous output before showing the new figure
    fig = go.Figure()  # Create a new figure for each iteration
    fig.add_trace(go.Scatter(x=a, y=a**i, mode='lines', name=f'a^({i})'))  # Add a trace to the figure
    fig.show()  # Display the figure
    time.sleep(1)  # Wait for 1 second before clearing and showing the next figure
