In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

nflx_data = pd.read_csv("Download Data - STOCK_US_XNAS_NFLX.csv")

# Shift the 'Close' price down to compare with the next day's price
nflx_data['Next_Close'] = nflx_data['Close'].shift(-1)
# Drop the last row as it will have NaN for 'Next_Close' after shifting
nflx_data.dropna(inplace=True)

nflx_data['Volume'] = nflx_data['Volume'].str.replace(',', '').astype(float)

# Feature Selection
X = nflx_data[['Open', 'High', 'Low', 'Volume']]
y = nflx_data['Next_Close']  # Target variable is the next day's closing price

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Data Preprocessing: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Training: KNN Regressor
knn_regressor = KNeighborsRegressor()
knn_regressor.fit(X_train_scaled, y_train)

# Predicting on the test set
y_pred_knn = knn_regressor.predict(X_test_scaled)

# Model Evaluation
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

mse_knn, r2_knn

(7.588636800000098, 0.9320755365492807)