In [None]:
import numpy as np
import pandas as pd

In [None]:
def import_data(file_path):
    data = pd.read_csv(file_path)
    A = data.iloc[:, :-1].values  # all columns except the last
    b = data.iloc[:, -1].values   # last column

    # Normalizing A and b
    A = (A - np.mean(A, axis=0)) / np.std(A, axis=0)
    b = (b - np.mean(b)) / np.std(b)

    return A, b

## Gradient descent

In [None]:
def initialize_vector(n):
    return np.random.rand(n)

In [None]:
def calculate_loss(a, v, b):
    b_pred = np.dot(a, v)
    mse = np.mean((b - b_pred) ** 2)
    return mse

In [None]:
def calculate_gradient(a, v, b):
    b_pred = np.dot(a, v)
    gradient = 2 * np.dot(a.T, (b_pred - b)) / len(b)
    return gradient

In [None]:
def update_parameters(v, learning_rate, gradient):
    v -= learning_rate * gradient
    return v

In [None]:
def gradient_descent(a, b, learning_rate=0.01, max_iterations=1000, tolerance=1e-6):
    v = initialize_vector(a.shape[1])
    for i in range(max_iterations):
        gradient = calculate_gradient(a, v, b)
        v_new = update_parameters(v, learning_rate, gradient)

        # Check for convergence
        if np.linalg.norm(v_new - v, ord=2) < tolerance:
            break

        v = v_new

    return v

In [None]:
filename = '/data1.csv'
a, b = import_data(filename)

v = gradient_descent(a, b)
print("Optimal vector v:")
print(v)


In [None]:
final_loss = calculate_loss(a, v, b)
print(f"Final loss: {final_loss:.6f}")

In [None]:
import time

def compute_gradient_descent_time(a, b):
    start_time = time.time()

    # Perform gradient descent
    gradient_descent(a, b)

    end_time = time.time()

    return end_time - start_time

In [None]:
# Compute the time taken for gradient descent
time_taken = compute_gradient_descent_time(a, b)
print(f"Time taken for gradient descent: {time_taken:.6f} seconds")