# *Importing Modules*

In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error , mean_squared_error , median_absolute_error

# *Creating DataFrame*

In [4]:
np.random.seed(0)
data = {
    'Feature1': np.random.rand(100),
    'Feature2': np.random.rand(100),
    'Feature3': np.random.rand(100),
    'Target': np.random.rand(100) * 100  # Random target values
}

dataframe = pd.DataFrame(data)
dataframe

Unnamed: 0,Feature1,Feature2,Feature3,Target
0,0.548814,0.677817,0.311796,90.655550
1,0.715189,0.270008,0.696343,77.404733
2,0.602763,0.735194,0.377752,33.314515
3,0.544883,0.962189,0.179604,8.110139
4,0.423655,0.248753,0.024679,40.724117
...,...,...,...,...
95,0.183191,0.490459,0.224317,95.898272
96,0.586513,0.227415,0.097844,35.536885
97,0.020108,0.254356,0.862192,35.670689
98,0.828940,0.058029,0.972919,1.632850


# *Splitting The Data*

In [5]:
x_train, x_test, y_train, y_test = train_test_split(dataframe.drop('Target',axis=1), dataframe['Target'], test_size=0.3, random_state=44, shuffle =True)
x_train

Unnamed: 0,Feature1,Feature2,Feature3
83,0.692472,0.237893,0.013237
4,0.423655,0.248753,0.024679
73,0.604846,0.018522,0.961570
85,0.265389,0.613966,0.148141
0,0.548814,0.677817,0.311796
...,...,...,...
3,0.544883,0.962189,0.179604
59,0.244426,0.069167,0.292148
45,0.670638,0.703889,0.396060
35,0.617635,0.590873,0.424685


# *Choosing the Best K*

In [6]:
from sklearn.model_selection import cross_val_score

# Create a KNN regressor
knn = KNeighborsRegressor()

# Define a range of k values to try
k_values = range(1, 21)

# Perform cross-validation for each k
cv_scores = []
for k in k_values:
    knn.set_params(n_neighbors=k)
    scores = cross_val_score(knn, x_train, y_train, cv=5, scoring='neg_mean_squared_error')
    cv_scores.append(-np.mean(scores))

# Find the optimal k
optimal_k = k_values[np.argmin(cv_scores)]
print("Optimal value of k:", optimal_k)

Optimal value of k: 9


# *Applying Model*

In [7]:
model=KNeighborsRegressor(n_neighbors=9,weights='distance') #Weight points by the inverse of their distance.
model.fit(x_train,y_train)

In [8]:
model.score(x_test,y_test)

-0.6138867804679282

In [9]:
model.score(x_train,y_train)

1.0

In [10]:
# so there is huge overfitting because the test points from gpt :)

In [12]:
mean_squared_error(y_test,model.predict(x_test))

999.2591000776209

In [13]:
mean_squared_error(y_train,model.predict(x_train))

0.0