## Chapter 3: Linear Regression

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## 3: Simple Linear Regression

#### 3.1.1: Estimating the Coefficients

- predicting a continuous value $Y$ on the basis of a single predictor variable $X$.
- assumes that there's approximately a linear relationship between $X$ and $Y$.
- $Y$ $\approx$ $\beta_0$ $+$ $\beta_1$ $X$ (approximately modeled as)
- $\beta_0$ and $\beta_1$ are unknown constants that represent the **intercept** and **slope**
- $\hat{y}$ = $\hat{\beta_0}$ $+$ $\hat{\beta_1}$ $x$ (Prediction for $Y$ based on the $ith$ value of $X$)
- We want to find an intercept $\beta_0$ and a slope $\beta_1$ that results to a line as close as possible to the $n$ data points.
- $e_i$ = $y_i$ $-$ $\hat{y}_i$ (Represents the $ith$ residual)

In [11]:
X = np.array([1, 2])
y = np.array([300, 500])


def compute_b1(X, y):
    """Computes the slope (b1)

    Args:
        X (array-like): Independent variable
        y (array-like): Dependednt variable

    Returns:
        float: The slope of the regression line
    """
    X_mean = np.mean(X, axis=0)
    y_mean = np.mean(y, axis=0)
    numerator = np.sum((X - X_mean) * (y - y_mean))
    denominator = np.sum((X - X_mean) ** 2)
    return numerator / denominator


def compute_b0(b1, X, y):
    """Computes the intercept (b0)

    Args:
        b1 (float): Slope
        X (array-like): Independent variable
        y (array-like): Dependednt variable

    Returns:
        float: The intercept of the regression line
    """
    X_mean = np.mean(X, axis=0)
    y_mean = np.mean(y, axis=0)
    return y_mean - b_1 * X_mean


b_1 = compute_b1(X, y)
b_0 = compute_b0(b_1, X, y)
print(b_1, b_0)

200.0 100.0


In [12]:
def compute_rss(y, y_hat):
    return np.sum((y - y_hat) ** 2)

In [20]:
y_hat = b_0 + np.dot(X, b_1)
rss = compute_rss(y, y_hat)
print(f"y_hat: {y_hat}")
print(f"RSS: {rss}")

y_hat: [300. 500.]
RSS: 0.0


#### 3.1.2: Assessing the Accuracy of the Coefficient Estimates