In [None]:
from sympy import symbols, diff, sqrt

# Define symbols
x = symbols('x')

# Define the function and its first derivative
f_prime = 7*x / sqrt(7*x**2 + 4)

# Compute the second derivative using the quotient rule via differentiation
f_double_prime = diff(f_prime, x)

f_double_prime.simplify()


### Libraries

In [None]:
import numpy as np
import pandas as pd

### Read data

In [None]:
# read and print the data set
real_estate_data = pd.read_csv("../../data/Real_estate.csv")
print (real_estate_data.shape)
real_estate_data.head()

### Spltting data into train and test sets

In [None]:
X = real_estate_data.iloc[:, 1:7]
y = real_estate_data.iloc[:, -1]

# Normalize the features to the [0, 1] range using min max
x_max = X.max(axis=0)
x_min = X.min(axis=0)
X_normalized = (X - x_min) / (x_max - x_min) 

### 2. a)

In [None]:
import matplotlib.pyplot as plt
w = np.array([1, 1, 1, 1, 1, 1])
b = 10
learning_rate = .001
steps = 4140
loss_values = []
for step in range(steps):
        
        y_pred = X_normalized.dot(w) + b
        loss = np.sum((y - y_pred) ** 2) / len(y)
        loss_values.append(loss) 
        
        gradient_w = -2 * (X_normalized.T.dot(y - y_pred) / len(y))
        gradient_b = -2 * (np.sum(y - y_pred) / len(y))
        
        w = w - (learning_rate * gradient_w)
        b = b - (learning_rate * gradient_b)
        
# Calculate R-squared using final predictions
SSR = ((y - y_pred) ** 2).sum()
SST = ((y - y.mean()) ** 2).sum()
r2 = 1 - (SSR / SST)

print("Weights: \n", w, "\n")
print("Bias: ", b, "\n")
print("R^2: ", r2)

In [None]:
# Plotting the loss over iterations
plt.figure(figsize=(10, 6))
plt.plot(loss_values, label='Loss (RSS)')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Loss During Gradient Descent')
plt.legend()
plt.show()

### b)

#### With a sufficiently large number of steps, gradient descent is expected to converge to a solution that is not the same but it is close to the optimal, as evidenced by the loss graph which shows a rapid initial decrease in cost and subsequent stabilization, indicating that the algorithm is approaching an optimal set of parameters.

### c)

In [None]:
# Initialize parameters
w = np.ones(6)
b = 10
learning_rate = 0.001
steps = 4140
n_points = len(y)
loss_values = []

# Perform Stochastic Gradient Descent
for step in range(steps):
    i = step % n_points
    x_i = X_normalized.iloc[i]
    y_i = y.iloc[i]
    y_pred_i = np.dot(w, x_i) + b
    
    loss = (y_i - y_pred_i) ** 2
    loss_values.append(loss) 
    
    gradient_w_i = -2 * x_i * (y_i - y_pred_i)
    gradient_b_i = -2 * (y_i - y_pred_i)
    
    w = w - learning_rate * gradient_w_i
    b = b - learning_rate * gradient_b_i

# Calculate predictions for the entire dataset with final w and b
final_y_pred = X_normalized.dot(w) + b

# Calculate R-squared using final predictions
SSR = ((y - final_y_pred) ** 2).sum()
SST = ((y - y.mean()) ** 2).sum()
r2 = 1 - (SSR / SST)

print("Weights: \n", w, "\n")
print("Bias: ", b, "\n")
print("R^2: ", r2)
