In [26]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [27]:
def function(*, input: np.ndarray, theta: np.ndarray) -> np.ndarray:
    # Create zeros matrix 
    y = np.zeros_like(input, dtype= float)
    # Multiply matrix X with theta matrix
    y = np.sum(np.dot(input, theta), axis= 1)
    return y

def normalized(vector: np.ndarray) -> np.ndarray:
    mean = np.mean(vector)  # Mean of input vector
    standard_deviation = np.std(vector) # std of input vector
    normalized_vector = (vector - mean) / standard_deviation # linear transform vector by using gauss
    return normalized_vector

def convert_data(*, input: np.ndarray, order: int) -> np.ndarray:
    """
        Convert vector x into matrix x with multiple order
    """
    ones = np.ones((len(input), order+1))
    for i in range(1, order + 1):
        ones[:, i] = input**i
    return ones

def true_value(*, x_normalized: np.ndarray, y_normalized: np.ndarray) -> np.ndarray:
    """
        Compute true value of theta 
        theta =   (X.T*X)^-1 * (X.T * Y)
    """
    inv_XT_X = np.linalg.inv(np.dot(x_normalized.T, x_normalized))
    XT_Y = np.dot(x_normalized.T, y_normalized)
    theta_true = np.dot(inv_XT_X, XT_Y)
    return theta_true

def inverse_normalized(*, theta_normalized: np.ndarray, input: np.ndarray, output: np.ndarray) -> np.ndarray:
    theta = np.zeros_like(theta_normalized)

    mean_x = np.mean(input,  axis= 0)
    std_x = np.std(input,  axis= 0)

    mean_y = np.mean(output, axis= 0)
    std_y = np.std(output, axis= 0)

    theta[1:] = std_y*theta_normalized[1:]/(std_x[1:].reshape(-1, 1))
    theta[0] = mean_y + std_y*theta_normalized[0] - np.dot(std_y*mean_x[1:]/std_x[1:], theta_normalized[1:])

    return theta

In [29]:
class Linear_Regression_Multivariables:
    def __init__(self,) -> None:
        pass

    def predict(self, *, theta: np.ndarray, normalized_input: np.ndarray) -> np.ndarray:
        y_pred = np.matmul(normalized_input, theta)
        return y_pred
    
    def compute_loss_function(self, *, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
        m = len(y_true)
        E = y_pred - y_true
        J = np.sum((E)**2)/ (2*m)
        return J
    
    def update_params(self, *, theta: np.ndarray, lr: float, y_pred: np.ndarray, 
                      y_true: np.ndarray, normalized_input: np.ndarray) -> np.ndarray:
        m = len(y_true)
        E = y_pred - y_true
        dJ_dtheta = np.dot(normalized_input.T, E) / (m)
        theta_updated = theta - lr*dJ_dtheta
        return theta_updated
    
    def train(self, *, epoch: int, theta: np.ndarray, normalized_input: np.ndarray, 
              y_true: np.ndarray, lr: float) -> np.ndarray:
        m = len(y_true)
        J_array = []
        for i in range(epoch):
            y_pred = self.predict(theta= theta, 
                                  normalized_input= normalized_input)
            J = self.compute_loss_function(y_true= y_true, 
                                           y_pred= y_pred)
            theta = self.update_params(theta= theta, lr= lr, y_pred= y_pred, 
                                       y_true= y_true, normalized_input= normalized_input)
            J_array.append(J)

        return J_array, theta



### 1. Normalize data

In [30]:
# Read csv file ex2.csv
pd_ex2 = pd.read_csv('ex2.csv')

# Get collumns of file 
X_cols = pd_ex2.columns[:-1]
Y_col = pd_ex2.columns[-1]

In [31]:
# Get vector input and output
X = pd_ex2[X_cols].values
Y = pd_ex2[Y_col].values

In [32]:
# Normalize 
X_norm = np.apply_along_axis(normalized, 0, X)
Y_norm = np.apply_along_axis(normalized, 0, Y)

In [33]:
### 2. Training 


**Raw**

In [34]:
order = 2

In [35]:
df = pd.read_csv('ex3.csv')

x_cols = df.columns[:len(df.columns) - 1]
y_col = df.columns[-1]

In [36]:
x_value = df[x_cols].to_numpy().reshape(-1, )
y_value = df[y_col].to_numpy().reshape(-1, )

# print_all(x_value, y_value )

In [37]:
x_value = convert_data(input= x_value, order= order)
y_value = y_value.reshape(-1, 1)

In [38]:
lrm_order = Linear_Regression_Multivariables()
theta_init = np.random.randn(order + 1, 1)

In [39]:
theta_train = 0
for i in range(10000):
    pred = lrm_order.predict(theta= theta_init, normalized_input= x_value)
    cost = lrm_order.compute_loss_function(y_pred= pred, y_true= y_value)
    theta_init = lrm_order.update_params(theta= theta_init, lr= 0.0000000001, y_pred= pred,
                                y_true= y_value, normalized_input= x_value)
    theta_train = theta_init
    # print(theta_init)
    print(cost)

414517906.7679166
412860333.2251204
411209396.9778978
409565071.44903064
407927330.167723
406296146.76917326
404671494.9941508
403053348.68857276
401441681.80308276
399836468.39263225
398237682.61606246
396645298.73568803
395059291.11688334
393479634.2276696
391906302.63830376
390339271.02086896
388778514.148867
387224006.89681256
385675724.23982805
384133641.2532413
382597733.11218387
381067975.09119195
379544342.5638077
378026811.00218344
376515355.97668636
375009953.15550524
373510578.30425906
372017207.28560656
370529816.05885804
369048380.6795877
367572877.299249
366103282.16479003
364639571.61827135
363181722.0964856
361729710.1305775
360283512.34566635
358843105.4604697
357408466.2869286
355979571.72983426
354556398.7864564
353138924.5461725
351727126.1900997
350320980.99072665
348920466.31154853
347525559.60670185
346136238.4206016
344752480.3875804
343374263.2315277
342001564.76553166
340634362.89152175
339272635.59991294
337916360.96925175
336565517.16586286
335220082.4434981

In [40]:
theta_train

array([[0.19987715],
       [2.25921397],
       [6.56829022]])

In [41]:
y_range = function(input= x_value, theta= theta_train)

# Create a new Plotly figure to visualize the data points and the regression line
fig = go.Figure()

# Add a scatter plot for the original data points from the CSV file
fig.add_trace(
    go.Scatter(
        x=x_value[:, 1],  # X values from the 'x' column
        y=y_value.reshape(-1,),  # Y values from the 'y' column
        mode='markers',  # Plot style as markers (points)
        marker=dict(symbol='x'),  # Marker style
        name='Data Points'  # Name for the legend
    )
)

fig.add_trace(
    go.Scatter(
        x=x_value[:, 1],  # X values for the regression line
        y=y_range.reshape(-1,),  # Y values for the regression line
        mode='lines',  # Plot style as a line
        name='Regression Line'  # Name for the legend
    )
)

**Function**

In [42]:
lrm_order = Linear_Regression_Multivariables()

J_array, theta_train_2 = lrm_order.train(
    epoch= 10000,
    theta= theta_init,
    normalized_input= x_value,
    y_true= y_value, 
    lr= 0.0000000001
)

In [43]:
theta_train_2

array([[0.19990628],
       [2.25918991],
       [6.56829053]])

In [44]:
y_range = function(input= x_value, theta= theta_train_2)

# Create a new Plotly figure to visualize the data points and the regression line
fig = go.Figure()

# Add a scatter plot for the original data points from the CSV file
fig.add_trace(
    go.Scatter(
        x=x_value[:, 1],  # X values from the 'x' column
        y=y_value.reshape(-1,),  # Y values from the 'y' column
        mode='markers',  # Plot style as markers (points)
        marker=dict(symbol='x'),  # Marker style
        name='Data Points'  # Name for the legend
    )
)

fig.add_trace(
    go.Scatter(
        x=x_value[:, 1],  # X values for the regression line
        y=y_range.reshape(-1,),  # Y values for the regression line
        mode='lines',  # Plot style as a line
        name='Regression Line'  # Name for the legend
    )
)

**Normalize**

In [45]:
order = 3
theta_init = np.random.randn(order + 1, 1)

x_value = df[x_cols].to_numpy().reshape(-1, )
y_value = df[y_col].to_numpy().reshape(-1, )

# print_all(x_value, y_value )
x_normalized = convert_data(input= x_value, order= order)
y_normalized = y_value.reshape(-1, 1)

In [46]:
x_normalized[:, 1:] = np.apply_along_axis(func1d= normalized, arr= x_normalized[:, 1:], axis= 0)
y_normalized = np.apply_along_axis(func1d= normalized, arr= y_normalized.reshape(-1, 1), axis= 0)

In [47]:
J_array, theta_train_3 = lrm_order.train(
    epoch= 1000000,
    theta= theta_init,
    normalized_input= x_normalized,
    y_true= y_normalized, 
    lr= 0.001
)
theta_train_3

array([[1.80361622e-16],
       [4.34826534e-02],
       [8.68934906e-01],
       [8.87134648e-02]])

In [48]:
y_range = function(input= x_normalized, theta= theta_train_3)

fig = go.Figure()

# Add a scatter plot for the original data points from the CSV file
fig.add_trace(
    go.Scatter(
        x=x_normalized[:, 1],  # X values from the 'x' column
        y=y_normalized.reshape(-1,),  # Y values from the 'y' column
        mode='markers',  # Plot style as markers (points)
        marker=dict(symbol='x'),  # Marker style
        name='Data Points'  # Name for the legend
    )
)

fig.add_trace(
    go.Scatter(
        x=x_normalized[:, 1],  # X values for the regression line
        y=y_range.reshape(-1,),  # Y values for the regression line
        mode='lines',  # Plot style as a line
        name='Regression Line'  # Name for the legend
    )
)

**Unnormalize**

In [49]:
input_x = convert_data(input= x_value, order= order)
output_y = y_value.reshape(-1, 1)

In [53]:
theta_unorm = inverse_normalized(theta_normalized= theta_train_3, input= input_x, output= output_y)
y_range = function(input= input_x, theta= theta_unorm)

# Create a new Plotly figure to visualize the data points and the regression line
fig = go.Figure()

# Add a scatter plot for the original data points from the CSV file
fig.add_trace(
    go.Scatter(
        x=input_x[:, 1],  # X values from the 'x' column
        y=y_value.reshape(-1,),  # Y values from the 'y' column
        mode='markers',  # Plot style as markers (points)
        marker=dict(symbol='x'),  # Marker style
        name='Data Points'  # Name for the legend
    )
)

fig.add_trace(
    go.Scatter(
        x=input_x[:, 1],  # X values for the regression line
        y=y_range.reshape(-1,),  # Y values for the regression line
        mode='lines',  # Plot style as a line
        name='Regression Line'  # Name for the legend
    )
)

$
{X}_{norm} \enspace {\theta}_{norm} = {Y}_{norm} \\
[m \times n]\![n \times 1] \enspace[m \times 1] \\
\! \\
$


\begin{multline}
    first part of the equation \\
    = second part of the equation
\end{multline}

In [59]:
import matplotlib.pyplot as plt                                                 
import sympy                                                                    

x = sympy.symbols('x')                                                          
y = 1 + sympy.sin(sympy.sqrt(x**2 + 20))                                         
lat = sympy.latex(y)                                                                                       