In [23]:
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [24]:
df = pd.read_csv("advertising.csv")

In [25]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [26]:
model_raw = KNeighborsRegressor(n_neighbors=5)
model_raw.fit(X_train, y_train)
y_raw_pred = model_raw.predict(X_test)

model_normalised = make_pipeline(
    StandardScaler(), KNeighborsRegressor(n_neighbors=5)
)
model_normalised.fit(X_train, y_train)
y_normalised_pred = model_normalised.predict(X_test)

model_minmaxscaled = make_pipeline(
    MinMaxScaler(), KNeighborsRegressor(n_neighbors=5)
)
model_minmaxscaled.fit(X_train, y_train)
y_minmaxscaled_pred = model_minmaxscaled.predict(X_test)

In [27]:
def evaluate(y_true, y_pred):
    return {
        "MSE": mean_squared_error(y_true, y_pred),
        "R^2": r2_score(y_true, y_pred),
    }

results = pd.DataFrame([
    { "Model": "kNN/raw", **evaluate(y_test, y_raw_pred) },
    { "Model": "kNN/standardized", **evaluate(y_test, y_normalised_pred) },
    { "Model": "kNN/minmaxscaled", **evaluate(y_test, y_minmaxscaled_pred) },
])

display(results)

Unnamed: 0,Model,MSE,R^2
0,kNN/raw,3.11553,0.899177
1,kNN/standardized,2.98003,0.903562
2,kNN/minmaxscaled,2.8054,0.909214
