# Curve fitting
In this notebook:
- We discuss Lagrange's interpolation theorem on finding polynomials that interpolate a set of points in the plane.
- We discuss polynomial regression, which is a method of fitting a polynomial to a set of points in the plane.
- We discuss the bias-variance tradeoff in regression problems, and discuss the concept of over-fitting and under-fitting. 

In [52]:
import numpy as np
import pandas as pd
from itertools import combinations

def build_interpolants(X, Y):
    """
    Given two Series X and Y (each of length n), return a dictionary mapping
    each subset of points to the coefficients of the unique interpolating
    polynomial (degree m-1) through those points.
    
    Raises a ValueError if any two x-values are identical.
    
    Parameters:
      X, Y: pandas.Series (floats), both of length n
            Each row corresponds to a point (x,y).
    
    Returns:
      A dict: 
         Keys are tuples of indices (the chosen subset);
         Values are lists of polynomial coefficients (highest degree first).
    """
    
    # Convert to numpy arrays for faster indexing
    X_arr = X.to_numpy()
    Y_arr = Y.to_numpy()
    n = len(X_arr)
    
    # 1. Check for duplicate x-values
    if len(set(X_arr)) < n:
        raise ValueError("Duplicate x-values detected. Interpolation not possible.")
    
    # 2. Build a dictionary of interpolants
    interpolants = {}
    
    # 3. For each subset size m
    for m in range(1, n + 1):
        # 4. For each m-element subset
        for subset in combinations(range(n), m):
            xs = X_arr[list(subset)]
            ys = Y_arr[list(subset)]
            
            # 5. Compute coefficients of the degree (m-1) polynomial, round to 6 decimals
            coeffs = np.polyfit(xs, ys, m - 1)
            coeffs = np.round(coeffs, 6)
            # 6. Store them in the dictionary with tuple(subset) as the key
            interpolants[tuple(subset)] = list(coeffs)
    
    # 7. Return the dictionary
    return interpolants

In [66]:
def random_poly(degree):
    """Generate a random monic polynomial of given degree, with integer coefficients between -10 and 10.
    """
    coeffs = np.random.randint(-10, 11, size=degree+1)
    coeffs[0] = 1  # Ensure the leading coefficient is 1
    return np.poly1d(coeffs)

def random_points(n,f):
    """Generate n random x-values uniformly from [0,1) and compute corresponding y-values using the polynomial f. Return two Series of length n containing the x and y values.
    """
    X = np.random.rand(n)
    return pd.Series(X), pd.Series(f(X))

# generate a random test case
np.random.seed(42)
n = 5
f = random_poly(n-1)
X = pd.Series(np.random.rand(n))
Y = pd.Series(f(X))
result_dict = build_interpolants(X, Y)

In [54]:
for m in range(1, n + 1):
    print(f"m = {m}")
    # for each subset of size m, print the coefficients
    for subset in combinations(range(len(X)), m):
        print(subset, result_dict[subset])

m = 1
(0,) [np.float64(0.493002)]
(1,) [np.float64(-2.86786)]
(2,) [np.float64(-2.867907)]
(3,) [np.float64(-2.98473)]
(4,) [np.float64(6.412661)]
m = 2
(0, 1) [np.float64(7.59277), np.float64(-4.052474)]
(0, 2) [np.float64(7.592461), np.float64(-4.052289)]
(0, 3) [np.float64(6.433397), np.float64(-3.358405)]
(0, 4) [np.float64(22.128103), np.float64(-12.754174)]
(1, 2) [np.float64(1.920368), np.float64(-3.167473)]
(1, 3) [np.float64(1.193342), np.float64(-3.054044)]
(1, 4) [np.float64(13.068257), np.float64(-4.906752)]
(2, 3) [np.float64(1.193163), np.float64(-3.054033)]
(2, 4) [np.float64(13.067879), np.float64(-4.906424)]
(3, 4) [np.float64(11.629103), np.float64(-3.66019)]
m = 3
(0, 1, 2) [np.float64(12.81424), np.float64(-2.077844), np.float64(-2.8556)]
(0, 1, 3) [np.float64(11.83819), np.float64(-1.341241), np.float64(-2.946765)]
(0, 1, 4) [np.float64(20.46776), np.float64(-7.85378), np.float64(-2.140747)]
(0, 2, 3) [np.float64(11.83795), np.float64(-1.341084), np.float64(-2.9467

In [56]:
# check whether the coefficients for m=1 match Y
for i in range(n):
    assert np.isclose(result_dict[(i,)][0], Y[i]), f"Failed for subset {(i,)}"

In [55]:
# check whether the coefficients for m=1 match Y
for i in range(n):
    assert (result_dict[(i,)][0] == Y[i]), f"Failed for subset {(i,)}"

AssertionError: Failed for subset (0,)

In [57]:
# check whether the coefficients for m=5 match the original polynomial
assert np.all(np.isclose(result_dict[tuple(range(n))], f)), "Failed for subset of all points"

In [58]:
result_dict[tuple(range(n))]

[np.float64(1.0),
 np.float64(9.0),
 np.float64(4.0),
 np.float64(0.0),
 np.float64(-3.0)]

In [63]:
for i in range(n):
    print(result_dict[(i,)][0] - Y[i])

-3.3386923953271364e-07
2.1774736147506246e-07
-4.6278055920723205e-07
1.808186138596568e-07
2.643047523065434e-07


In [68]:
from manim import *
import numpy as np
import pandas as pd
from itertools import combinations

class InterpolationAnimation(Scene):
    def construct(self):
        # 1. Parameter validation
        n = 5  # Example value, modify as needed
        if not isinstance(n, int) or n <= 0:
            raise ValueError("n must be a positive integer")

        # 2. Generate random polynomial
        f = random_poly(n-1)
        
        # 3. Generate random points
        X, Y = random_points(n, f)
        
        # 4. Create and fade in axes
        axes = Axes(
            x_range=[-1,2],
            y_range=[-10, 10],
            axis_config={"color": BLUE},
        ).add_coordinates()
        
        self.play(FadeIn(axes))
        
        # 5. Create and fade in points
        dots = VGroup(*[
            Dot(axes.c2p(x, y), color=RED)
            for x, y in zip(X, Y)
        ])
        self.play(FadeIn(dots))
        
        # 6. Create degree equation
        degree_text = MathTex("\\text{degree} =").to_corner(UR)
        self.play(Write(degree_text))
        
        # 7. Get interpolation dictionary
        interp_dict = build_interpolants(X, Y)
        
        # 8-12. Main animation loop
        for m in range(1, n+1):
            # Update degree number
            degree_num = MathTex(str(m)).next_to(degree_text, RIGHT)
            self.play(Write(degree_num))
            
            # Get all m-element subsets
            subsets = list(combinations(range(n), m))
            time_per_poly = 3 / len(subsets)  # Total 3 seconds per m
            
            # Animate each polynomial for this m
            for subset in subsets:
                coeffs = interp_dict[tuple(subset)]
                p = np.poly1d(coeffs)
                
                # Create graph
                x_vals = np.linspace(-5, 5, 100)
                y_vals = p(x_vals)
                graph = axes.plot(
                    lambda x: p(x),
                    x_range=[-5, 5],
                    color=YELLOW
                )
                
                # Highlight selected points
                selected_dots = VGroup(*[
                    dots[i].copy().set_color(YELLOW)
                    for i in subset
                ])
                
                # Animate polynomial and point highlighting
                self.play(
                    Create(graph),
                    Transform(dots, selected_dots),
                    run_time=time_per_poly
                )
                
                # Remove polynomial
                self.remove(graph)
                
            # Reset points to original color
            self.play(
                dots.animate.set_color(RED)
            )
            
            # Pause for 2 seconds
            self.wait(2)
            
            # Remove degree number for next iteration
            self.remove(degree_num)

        # Final pause
        self.wait(1)

# To run the animation, use the following command in the terminal:
%manim -ql -v WARNING InterpolationAnimation

                                                                                               

                                                                                               