In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
%matplotlib inline



In [None]:
filename="Advertising.csv"
df = pd.read_csv(filename)
df.head()
x = df.TV.values
y = df.Sales.values
x_train, x_test, y_train, y_test = train_test_split(x,y,train_size=0.6,random_state=66)

In [None]:
Knns = np.linspace(1,70,num=70,dtype=int)
mses = {}
for k in Knns:
    knnmodel = KNeighborsRegressor(n_neighbors=int(k))
    knnmodel.fit(x_train.reshape(-1,1),y_train.reshape(-1,1))
    y_pred = knnmodel.predict(x_test.reshape(-1,1))
    mse = mean_squared_error(y_test, y_pred)
    r2e = r2_score(y_test,y_pred)
    mses[k]={"mse":mse,
             "r2e":r2e}


In [None]:
mse_df = pd.DataFrame.from_dict(mses, orient='index')
mse_df.head()
mse_df.plot(figsize=(10,8),
            subplots=True,
            #style='-o',
            grid=True,
            )
#plt.tight_layout()

In [None]:
# Efficiently extract the data into lists
k_values = list(mses.keys())
mse_values = [v['mse'] for v in mses.values()]
r2_values = [v['r2e'] for v in mses.values()]
import matplotlib.pyplot as plt

# 1. Create a figure and a set of subplots
# fig, axes = plt.subplots(nrows, ncols, ...)
# sharex=True links the x-axis of both plots. Zooming on one will zoom the other.
fig, axes = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

# 2. Plot MSE on the first subplot (axes[0])
axes[0].plot(k_values, mse_values, marker='o', linestyle='-', color='b')
axes[0].set_ylabel("Mean Squared Error (MSE)")
axes[0].set_title("Model Performance vs. K")
axes[0].grid(True)

# 3. Plot R2 Score on the second subplot (axes[1])
axes[1].plot(k_values, r2_values, marker='o', linestyle='-', color='r')
axes[1].set_ylabel("R² Score")
axes[1].set_xlabel("Value of K (Neighbors)")
axes[1].grid(True)

# Find the best K for R2 (max value) and MSE (min value) to annotate the plot
best_k_r2 = k_values[np.argmax(r2_values)]
best_k_mse = k_values[np.argmin(mse_values)]
axes[0].axvline(x=best_k_mse, color='gray', linestyle='--', label=f'Best K (MSE) = {best_k_mse}')
axes[1].axvline(x=best_k_r2, color='gray', linestyle='--', label=f'Best K (R²) = {best_k_r2}')
axes[0].legend()
axes[1].legend()


# 4. Improve layout and show the plot
plt.tight_layout() # Adjusts subplot params for a tight layout.
plt.show()