Here is a small little function to visualize different degrees of polynomials on the left graph and on the right one you can see the decrease in variance. Our goal is to choose the degree where variance is NOT significantly decreased from the previous degree (to avoid overfitting). You can call this function with X and y and pass any degree of polynomial you like to see (the default is 1). R^2 and RMSE metrics are also printed to compare the different degrees.

In [1]:
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [2]:
def plot_poly(X, y, degree = 1):
    
    var_array = []
    i_array = []
    plt.figure(figsize=(16, 6))
 
    for i in range(1, degree+1):
        poly_features = PolynomialFeatures(degree=i)
        X_poly = poly_features.fit_transform(X)
        lm = LinearRegression()
        lm.fit(X_poly, y)
        y_pred = lm.predict(X_poly)
        variance = (np.power(y - y_pred, 2).sum()) / (len(X) - i - 1)
        var_array.append(variance)
        i_array.append(i)
        print(f"R2 {i} degree: {r2_score(y, y_pred)}")
        print(f"RMSE {i} degree: {np.sqrt(mean_squared_error(y, y_pred))}")
        print(f"Variance: {variance}")
        print("--------------------------------------")
        
        plt.subplot(1, 2, 1)
        plt.scatter(X, y)
        plt.plot(X, y_pred, label=f"degree {i}")
        plt.legend()
 
        plt.subplot(1, 2, 2)
        plt.scatter(i, variance, label=f"degree={i}", s=70)
        plt.plot(i_array, var_array, color="grey")
        plt.xlabel("degree, n", fontsize=18)
        plt.ylabel("variance", fontsize=18)
        plt.legend()

In [4]:
#Function call example: 
plot_poly(X, y, degree=4)