<a href="https://colab.research.google.com/github/akhan126/AdamEmilyZannie/blob/main/Base_Variance_Tradeoff_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Applied Machine Learning

## Interactive demonstration of the Bias-Variance Tradeoff

You can adjust the polynomial degree of a model and see how the model's fit,
bias, variance, and total error change in real-time.

## 1. Imports and Setup


In [None]:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import ipywidgets as widgets
from IPython.display import display

import warnings
warnings.filterwarnings("ignore")

# Matplotlib style for a cleaner look
plt.style.use('seaborn-v0_8-whitegrid')



## Generate Synthetic Data
Create a non-linear dataset based on a sine wave with some random noise.
This mimics a real-world scenario where there's an underlying pattern
obscured by randomness.

In [None]:
np.random.seed(42)
N_SAMPLES = 100  # Total number of data points
NOISE_LEVEL = 0.3  # The amount of random noise

# The "true" function that generates the data (a sine wave)
def true_function(x):
    return np.sin(2 * np.pi * x)

# Generate X values (features) and y values (labels) with noise
X = np.sort(np.random.rand(N_SAMPLES))
y = true_function(X) + np.random.randn(N_SAMPLES) * NOISE_LEVEL

# For plotting the true function smoothly
X_true = np.linspace(0, 1, 100)
y_true = true_function(X_true)



## Calculate Bias, Variance, and Error
To plot the error curves, we need to estimate bias and variance for various
model complexities. The true definitions require averaging over many datasets.
We simulate this using __bootstrapping__: repeatedly fitting models to different
random subsets of our main training data.


In [None]:
MAX_DEGREE = 15  # Maximum polynomial degree to consider
N_BOOTSTRAPS = 100 # Number of bootstrap samples for stable estimation

degrees = np.arange(1, MAX_DEGREE + 1)
all_biases = []
all_variances = []
all_total_errors = []

# Generate a pool of test data points to evaluate the model
X_test = np.linspace(0, 1, 100)
y_test = true_function(X_test)

for degree in degrees:
    # Store predictions from each bootstrap model
    bootstrap_preds = np.zeros((N_BOOTSTRAPS, len(X_test)))

    # Define the model for the current degree
    model = Pipeline([
        ('poly', PolynomialFeatures(degree=degree)),
        ('linear', LinearRegression())
    ])

    for i in range(N_BOOTSTRAPS):
        # Sample the training data with replacement
        bootstrap_indices = np.random.randint(0, N_SAMPLES, N_SAMPLES)
        X_bootstrap, y_bootstrap = X[bootstrap_indices], y[bootstrap_indices]

        # Fit the model on the bootstrap sample
        model.fit(X_bootstrap[:, np.newaxis], y_bootstrap)
        bootstrap_preds[i, :] = model.predict(X_test[:, np.newaxis])

    # --- Calculate Bias and Variance ---
    # Average prediction across all bootstrap models
    mean_predictions = np.mean(bootstrap_preds, axis=0)

    # Bias^2: (Average Prediction - True Value)^2
    # How far off are the model's predictions on average?
    bias_squared = np.mean((mean_predictions - y_test)**2)

    # Variance: E[(Prediction - Average Prediction)^2]
    # How much do the predictions vary for a given point?
    variance = np.mean(np.var(bootstrap_preds, axis=0))

    # Total Error (MSE) = Bias^2 + Variance + Irreducible Error
    # Note: We calculate total error on the original data for consistency in the final plot.
    model.fit(X[:, np.newaxis], y)
    y_pred = model.predict(X_test[:, np.newaxis])
    total_error = mean_squared_error(y_test, y_pred)

    all_biases.append(bias_squared)
    all_variances.append(variance)
    all_total_errors.append(total_error)

# The irreducible error is the variance of the noise itself
irreducible_error = np.full_like(degrees, NOISE_LEVEL**2, dtype=float)


# Define the Interactive Plotting Function
This function is the core of the interactive notebook. It's called every time
the slider value changes. It creates two plots:
1. The data and the current model's fit.
2. The curves for Bias², Variance, and Total Error.

In [None]:
def plot_bias_variance(degree=1):
    """
    Plots the model fit and the bias-variance decomposition for a given degree.
    """
    # Define the model for the selected degree
    model = Pipeline([
        ('poly', PolynomialFeatures(degree=degree)),
        ('linear', LinearRegression())
    ])

    # Fit the model to the entire training set
    model.fit(X[:, np.newaxis], y)
    y_pred_curve = model.predict(X_true[:, np.newaxis])

    # Create the figure and axes
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6), gridspec_kw={'width_ratios': [2, 3]})
    fig.suptitle(f'Bias-Variance Tradeoff Analysis (Model Complexity: Degree = {degree})', fontsize=16)

    # --- Plot 1: Model Fit vs. True Function ---
    ax1.scatter(X, y, facecolors='none', edgecolors='C0', label='Training Data')
    ax1.plot(X_true, y_true, 'k--', label='True Function (Underlying Pattern)')
    ax1.plot(X_true, y_pred_curve, 'r-', lw=2, label='Fitted Model')
    ax1.set_title(r'Model Fit on Data', fontsize=14)
    ax1.set_xlabel('Feature (X)')
    ax1.set_ylabel('Target (y)')
    ax1.legend(loc='best')
    ax1.set_ylim(-1.5, 1.5)

    # --- Plot 2: Bias-Variance Decomposition ---
    ax2.plot(degrees, all_biases, 'b-.o', label=r'Bias$^2$')
    ax2.plot(degrees, all_variances, 'g-o', label='Variance')
    #ax2.plot(degrees, irreducible_error, 'gray', linestyle=':', label='Irreducible Error ($\sigma^2$)')
    ax2.plot(degrees, all_total_errors, 'm-->', label='Total Error (MSE)')

    # Highlight the current degree
    ax2.axvline(x=degree, color='r', linestyle='--', lw=2, label=f'Current Degree ({degree})')

    ax2.set_title(r'Bias-Variance Decomposition vs. Model Complexity', fontsize=14)
    ax2.set_xlabel('Model Complexity (Polynomial Degree)')
    ax2.set_ylabel('Error')
    ax2.legend(loc='upper center')
    ax2.set_xticks(degrees)
    ax2.set_ylim(0, 0.5)

    plt.tight_layout()
    plt.show()

    print(f"Bias:\t\t\t {all_biases[degree]:.5f}")
    print(f"Variance:\t\t {all_variances[degree]:.5f}")
    print(f"Total Error (MSE):\t {all_total_errors[degree]:.5f}\n")

    # --- Textual Explanation ---
    if degree == 1:
        print("Simple Model (Low Complexity): High Bias, Low Variance.")
        print("The model (red line) is too simple to capture the underlying sine wave pattern. It is 'underfitting'.")
        print("Notice on the right plot that Bias² is high, but Variance is very low.")
    elif degree <= 4:
        print("Balanced Model (Good Fit): Low Bias, Low Variance.")
        print("The model is complex enough to capture the pattern without fitting the noise. This is a good tradeoff spot.")
        print("Total error is near its minimum.")
    elif degree <= 8:
        print("Complex Model (Starting to Overfit): Low Bias, Increasing Variance.")
        print("The model fits the training data very well but is starting to become wiggly, influenced by random noise.")
        print("Bias² is low, but Variance is starting to increase, pushing the total error up.")
    else:
        print("Very Complex Model (High Complexity): Low Bias, High Variance.")
        print("The model is now 'overfitting'. It twists and turns to pass through as many data points as possible.")
        print("This model would perform poorly on new, unseen data. Notice how Variance is now the dominant source of error.")


# --- 5. Create and Display the Interactive Widget ---
# We use an IntSlider to allow the user to select the polynomial degree.2
# The `interactive` function links the slider to our plotting function.

degree_slider = widgets.IntSlider(
    value=1,
    min=1,
    max=MAX_DEGREE,
    step=1,
    description='Polynomial Degree:',
    continuous_update=False, # Only update plot when slider is released
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

interactive_plot = widgets.interactive(plot_bias_variance, degree=degree_slider)

# Display the interactive user interface
display(interactive_plot)

interactive(children=(IntSlider(value=1, continuous_update=False, description='Polynomial Degree:', layout=Lay…