<a href="https://colab.research.google.com/github/hsyen78444/StockPricePredictionUsingKNN/blob/main/MLProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [2]:
import io
import requests
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
# Import dataset

DATA_DIR = "/content/gdrive/MyDrive/MLdata/oil_and_gas.csv"
data = pd.read_csv (DATA_DIR, header=0)


In [4]:
# Function to preprocess data
def preprocess_data(data):
    # Create lagged features
    for i in range(1, 6):
        data[f'Lagged_{i}'] = data['Close'].shift(i)
    data.dropna(inplace=True)

    # Split data into features and target variable
    X = data[['Open', 'High', 'Low', 'Volume']]
    y = data['Close']

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

In [5]:
# Function to train and evaluate KNN model
def train_evaluate_knn(X_train, X_test, y_train, y_test, k):
    # Train KNN model
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_train, y_train)

    # Predictions
    y_pred_train = knn.predict(X_train)
    y_pred_test = knn.predict(X_test)

    # Evaluate model
    # Evaluate model
    train_mae = mean_absolute_error(y_train, y_pred_train)
    test_mae = mean_absolute_error(y_test, y_pred_test)
    train_mse = mean_squared_error(y_train, y_pred_train)
    test_mse = mean_squared_error(y_test, y_pred_test)
    train_rmse = np.sqrt(train_mse)
    test_rmse = np.sqrt(test_mse)
    train_r2 = r2_score(y_train, y_pred_train)
    test_r2 = r2_score(y_test, y_pred_test)
    print(f'Train MAE: {train_mae}')
    print(f'Test MAE: {test_mae}')
    print(f'Train MSE: {train_mse}')
    print(f'Test MSE: {test_mse}')
    print(f'Train RMSE: {train_rmse}')
    print(f'Test RMSE: {test_rmse}')
    print(f'Train R-squared: {train_r2}')
    print(f'Test R-squared: {test_r2}')

In [6]:
# Preprocess data
X_train, X_test, y_train, y_test = preprocess_data(data)

# Train and evaluate KNN model
train_evaluate_knn(X_train, X_test, y_train, y_test, k=5)  # You can adjust k as needed

Train MAE: 0.42777513331523215
Test MAE: 0.5294326281494353
Train MSE: 1.0213139727649851
Test MSE: 1.5001466792298002
Train RMSE: 1.010600797924178
Test RMSE: 1.2248047514725766
Train R-squared: 0.9992002374497595
Test R-squared: 0.9988768594370163
