In [None]:
%pip install pandas
%pip install numpy
%pip install matplotlib

Collecting pandas
  Downloading pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl (10.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hDownloading numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl (5.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, n

In [None]:
# Generate Dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(42)

num_samples = 100
house_sizes = np.random.uniform(800, 3500, num_samples)
true_w = 100
true_b = 50000
noise = np.random.normal(0, 15000, num_samples)
house_prices = true_w * house_sizes + true_b + noise

data = pd.DataFrame({
    'size_sqft': house_sizes,
    'price_usd': house_prices
})

data.to_csv('house_prices.csv', index=False)

print(data.head())

     size_sqft      price_usd
0  1811.258321  232431.538112
1  3366.928627  382207.752474
2  2776.383643  329014.775937
3  2416.377907  261824.257014
4  1221.250329  168829.954602


Linear Regression

In [None]:
# Load Data
data = pd.read_csv('house_prices.csv')
x = data['size_sqft'].values.astype(np.float64)
y = data['price_usd'].values.astype(np.float64)

# Declare Initial Weight & Bias (random for now)
w = np.random.randn() * 0.01
b = np.random.randn() * 0.01

# Declare Learning Rate (not too large or small) + number of times gradient descent will run
learning_rate = 0.01
epochs = 1000

# Will be used later to plot results
costs = []

# Training Loop
for epoch in range(epochs):
  # Prediction
  f = w * x + b
  
  # Cost (MSE)
  j = np.mean((f-y)**2)/2
  costs.append(j)
  
  # Gradient Descent
  dw = np.mean((f - y) * x)
  db = np.mean(f - y) 
  w = w - learning_rate * dw
  b = b - learning_rate * db

final_predictions = w * x + b

# Plotting Data
plt.plot(range(epochs), costs)
plt.xlabel('Epoch')
plt.ylabel('Cost (J)')
plt.title('Cost Function Convergence')
plt.show()

plt.scatter(x, y, color='blue', label='Data points')
plt.plot(x, final_predictions, color='red', label='Fitted line')
plt.xlabel('House Size (sq ft)')
plt.ylabel('Price (USD)')
plt.title('Linear Regression Fit')
plt.legend()
plt.show()

# Print learned parameters
print(f"Learned w: {w}, Learned b: {b}")