# Lab01: Linear Regression 线性回归

## 1 - 依赖包

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import math
%matplotlib inline

## 2 - 问题陈述

假设你是一家连锁餐厅的首席执行官，正在考虑在不同的城市开设一家新店。

- 你想把业务扩展到可能给你的餐馆带来更高利润的城市。
- 该连锁店已经在各个城市开设了餐厅，你也有来自这些城市的利润和人口数据。
- 你还可以获得候选餐厅所在城市的数据。
  - 对于这些城市，你有城市人口总数。

你能利用这些数据来帮助你识别哪些城市可能会给你的企业带来更高的利润吗？

## 3 - 数据集

我们将从加载数据集开始。

- 下面所示的 `load_data()` 函数将数据加载到变量 `x_train` 和 `y_train` 中。
  - `x_train` 是城市的人口总数。
  - `y_train` 是城市餐馆的利润。负数的利润表示亏损。
  - `x_train`和`y_train`都是numpy数组。

In [None]:
# Load the dataset

def load_data():
    data = np.loadtxt('data.txt', delimiter=',')
    x = data[:, 0]
    y = data[:, 1]
    return x, y


x_train, y_train = load_data()

In [None]:
# Visualize the data

plt.scatter(x_train, y_train, marker='x', c='r')
plt.title('Profits vs. Population per city')
plt.ylabel('Profit in $10,000')
plt.xlabel('Population of City in 10,000s')
plt.show()

## 4 - Cost Function

In [None]:
def compute_cost(x, y, w, b):
    """
    Computes the cost function for linear regression.

    Args:
        x (ndarray): Shape (m,) Input to the model (population of cities)
        y (ndarray): Shape (m,) Label (actual profits for the cities)
        w, b (scalar): Parameters of the model

    Returns:
        total_cost (float): The cost of using w,b as the parameters for linear regression to fit the data points in x and y
    """

    m = x.shape[0]

    total_cost = 0

    for i in range(m):
        total_cost += (w * x[i] + b - y[i]) ** 2

    total_cost /= (2 * m)

    return total_cost

## 5 - Gradient Descent

In [None]:
def compute_gradient(x, y, w, b):
    """
    Computes the gradient for linear regression.

    Args:
        x (ndarray): Shape (m,) Input to the model (population of cities)
        y (ndarray): Shape (m,) Label (actual profits for the cities)
        w, b (scalar): Parameters of the model

    Returns:
        dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
        dj_db (scalar): The gradient of the cost w.r.t. the parameter b
    """

    m = x.shape[0]

    dj_dw = 0
    dj_db = 0

    for i in range(m):
        tmp = (w * x[i] + b - y[i])
        dj_dw += tmp * x[i]
        dj_db += tmp

    dj_dw /= m
    dj_db /= m

    return dj_dw, dj_db

## 6 - Learning Parameters Using Gradient Descent

In [None]:
def gradient_descent(x, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    """
    Performs batch gradient descent to learn theta. Updates theta by taking num_iters gradient steps with learning rate alpha.

    Args:
        x (ndarray): Shape (m,)
        y (ndarray): Shape (m,)
        w_in, b_in (scalar): Initial values of parameters of the model
        cost_function: Function to compute cost
        gradient_function: Function to compute the gradient
        alpha (float): Learning rate
        num_iters (int): Number of iterations to run gradient descent

    Returns:
        w (ndarray): Shape (1,) Updated values of parameters of the model after running gradient descent
        b (scalar): Updated value of parameter of the model after running gradient descent
    """

    # An array to store cost J and w's at each iteration — primarily for graphing later
    J_history = []
    w_history = []
    w = copy.deepcopy(w_in)  # Avoid modifying global w within function
    b = b_in

    for i in range(num_iters):
        # Calculate the gradient and update the parameters
        dj_dw, dj_db = gradient_function(x, y, w, b)

        # Update parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        # Save cost J at each iteration
        if i < 100000:  # Prevent resource exhaustion
            cost = cost_function(x, y, w, b)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 10) == 0:
            w_history.append(w)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}")

    return w, b, J_history, w_history

In [None]:
# Initialize fitting parameters
initial_w = 0.
initial_b = 0.

# Some gradient descent settings
iterations = 1500
alpha = 0.01

w, b, _, _ = gradient_descent(x_train, y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations)
print('w,b found by gradient descent:', w, b)

In [None]:
# Plot the linear fit

m = x_train.shape[0]
predicted = np.zeros(m)

for i in range(m):
    predicted[i] = w * x_train[i] + b

plt.plot(x_train, predicted, c='b')
plt.scatter(x_train, y_train, marker='x', c='r')
plt.title('Profits vs. Population per city')
plt.ylabel('Profit in $10,000')
plt.xlabel('Population of City in 10,000s')
plt.show()