In [8]:
import numpy as np
import pandas as pd

# Define the K-nearest neighbors algorithm
class KNNRegressor:
    def __init__(self, k):
        self.k = k
        
    def fit(self, X, y):
        self.X = X
        self.y = y
        
    def predict(self, X):
        y_pred = np.zeros(len(X))
        for i in range(len(X)):
            distances = np.sqrt(np.sum((self.X - X[i])**2, axis=1))
            idx = np.argsort(distances)[:self.k]
            y_pred[i] = np.mean(self.y[idx])
        return y_pred

In [9]:
# Load the data
data = pd.read_csv('Advertising_new.csv')
data

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [10]:
X = data.drop(columns=['Sales'])
y = data['Sales']

X

Unnamed: 0,TV,Radio,Newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4
...,...,...,...
195,38.2,3.7,13.8
196,94.2,4.9,8.1
197,177.0,9.3,6.4
198,283.6,42.0,66.2


In [11]:
y

0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: Sales, Length: 200, dtype: float64

In [7]:
def scale_data(X):
    """
    A function to scale the data using z-score normalization
    """
    X_mean = X.mean(axis=0)
    X_std = X.std(axis=0)
    X_scaled = (X - X_mean) / X_std
    return X_scaled

X_scaled = scale_data(X)
X_scaled

Unnamed: 0,TV,Radio,Newspaper
0,0.967425,0.979066,1.774493
1,-1.194379,1.080097,0.667903
2,-1.512360,1.524637,1.779084
3,0.051919,1.214806,1.283185
4,0.393196,-0.839507,1.278593
...,...,...,...
195,-1.267759,-1.317724,-0.769287
196,-0.615491,-1.236899,-1.031011
197,0.348934,-0.940539,-1.109069
198,1.590574,1.261955,1.636743


In [14]:
import numpy as np

def knn_regression(X_train, y_train, X_test, k):
    """
    A function to perform k-nearest neighbors regression
    """
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]
    y_pred = np.zeros(n_test)
    for i in range(n_test):
        distances = np.linalg.norm(X_train - X_test[i], axis=1)
        nearest_neighbors = np.argsort(distances)[:k]
        y_pred[i] = np.mean(y_train[nearest_neighbors])
    return y_pred

In [18]:
import numpy as np
import pandas as pd

# Load the data
data = pd.read_csv('Advertising_new.csv')
X = data.iloc[:, :3].values
y = data.iloc[:, 3].values

def scale_data(X):
    """
    A function to scale the data using z-score normalization
    """
    X_mean = X.mean(axis=0)
    X_std = X.std(axis=0)
    X_scaled = (X - X_mean) / X_std
    return X_scaled

def knn_regression(X_train, y_train, X_test, k):
    """
    A function to perform k-nearest neighbors regression
    """
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]
    y_pred = np.zeros(n_test)
    for i in range(n_test):
        distances = np.linalg.norm(X_train - X_test[i], axis=1)
        nearest_neighbors = np.argsort(distances)[:k]
        y_pred[i] = np.mean(y_train[nearest_neighbors])
    return y_pred

k = [] # set the number of nearest neighbors
X_train_scaled = scale_data(X)
y_pred = knn_regression(X_train_scaled, y, X_train_scaled, k)
print(y_pred)


TypeError: slice indices must be integers or None or have an __index__ method

In [19]:
import numpy as np

# Define the hyperparameters to search over
k_values = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21]
#weight_options = ['uniform', 'distance']

# Define a function to perform the grid search
def grid_search_knn(X_train, y_train, k_values, cv=5):
    """
    A function to perform a grid search over hyperparameters for k-nearest neighbors regression
    """
    n_train = X_train.shape[0]
    k_scores = {}
    for k in k_values:
        scores = np.zeros(cv)
        for i in range(cv):
            fold_start = int(i * n_train / cv)
            fold_end = int((i+1) * n_train / cv)
            X_fold_val = X_train[fold_start:fold_end]
            y_fold_val = y_train[fold_start:fold_end]
            X_fold_train = np.concatenate((X_train[:fold_start], X_train[fold_end:]), axis=0)
            y_fold_train = np.concatenate((y_train[:fold_start], y_train[fold_end:]), axis=0)
            y_pred = knn_regression(X_fold_train, y_fold_train, X_fold_val, k=k)
            scores[i] = np.mean((y_pred - y_fold_val)**2)
        k_scores[k] = np.mean(scores)
    return k_scores

In [None]:
# Call the function to perform the grid search
k_scores = grid_search_knn(X_train, y_train, k_values, cv=5)

In [None]:
# Find the best hyperparameter value based on the cross-validation scores
best_k = min(k_scores, key=k_scores.get)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[['x1', 'x2']], data['y'], test_size=0.2, random_state=42)

In [None]:
X_train

In [None]:
X_test

In [None]:
y_train

In [None]:
y_test

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train the model
knn = KNNRegressor(k=5)
knn.fit(X_train, y_train)

In [None]:
# Evaluate the model using cross-validation
scores = []
for k in range(1, 21):
    knn = KNNRegressor(k=k)
    scores.append(np.mean(cross_val_score(knn, X_train, y_train, cv=5)))

In [None]:
# Select the best hyperparameter
best_k = np.argmax(scores) + 1
print('Best k:', best_k)

In [None]:
# Train the model with the best hyperparameter
knn = KNNRegressor(k=best_k)
knn.fit(X_train, y_train)

In [None]:
# Evaluate the model on the testing set
y_pred = knn.predict(X_test)
mse = np.mean((y_pred - y_test)**2)
print('MSE:', mse)
