### XGBOOST

In [2]:
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler

# Load stock price data from CSV file
data = pd.read_csv('google.csv')

# Use 'Close' price as the feature and shift it to create the target variable
data['Next_Day_Close'] = data['close'].shift(-1)

# Drop rows with missing values (last row)
data.dropna(inplace=True)

# Define features and target variable
features = ['close']
target = 'Next_Day_Close'

X = data[features]
y = data[target]

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the scaled data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Create and fit the XGBoost model
model = xgb.XGBRegressor()
model.fit(X_train, y_train)

# Predict stock prices
y_pred = model.predict(X_test)

# Calculate RMSE, MAE, and R-squared
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)

print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Error (MAE):', mae)
print('R-squared:', r_squared)



Root Mean Squared Error (RMSE): 30.242968512094954
Mean Absolute Error (MAE): 17.631582118443077
R-squared: 0.993534171983536


In [6]:
# # Visualize the results with adjusted x-axis ticks
# dates = pd.to_datetime(data['date'])
# plt.figure(figsize=(12, 6))
# plt.plot(dates[:len(y_train)], y_train.values, label='Train', color = 'green')
# plt.plot(dates[len(y_train):], y_test.values, label='Actual', color = 'blue')
# plt.plot(dates[len(y_train):len(y_train)+len(y_pred)], y_pred, label='Predicted', color = 'red')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.title('Stock Prices Prediction Plot - XGBOOST')
# plt.legend()

# # Adjust x-axis ticks to display only years
# plt.gca().xaxis.set_major_locator(plt.MaxNLocator(integer=True))
# plt.xticks(rotation=45)

# plt.show()


### OPTIMIZED MODEL

In [4]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load stock price data (replace 'your_stock_data.csv' with your actual data file)
data = pd.read_csv('google.csv')

# Assuming your data has columns like 'Date', 'Close', etc.
# You might need to preprocess the data to create relevant features.

# Use 'Close' price as the feature and shift it to create the target variable
data['Next_Day_Close'] = data['close'].shift(-1)

# Drop rows with missing values (last row)
data.dropna(inplace=True)

# Define features and target variable
features = ['close']
target = 'Next_Day_Close'

X = data[features]
y = data[target]

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the scaled data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

model = xgb.XGBRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

# Predict stock prices
y_pred = best_model.predict(X_test)

# Calculate RMSE, MAE, and R-squared
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)

print('Best Parameters:', grid_search.best_params_)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Error (MAE):', mae)
print('R-squared:', r_squared)


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}
Root Mean Squared Error (RMSE): 27.33535220365094
Mean Absolute Error (MAE): 15.980250902932783
R-squared: 0.9947176805468972


In [7]:
# # Visualize the results with adjusted x-axis ticks
# dates = pd.to_datetime(data['date'])
# plt.figure(figsize=(12, 6))
# plt.plot(dates[:len(y_train)], y_train.values, label='Train', color = 'green')
# plt.plot(dates[len(y_train):], y_test.values, label='Actual', color = 'blue')
# plt.plot(dates[len(y_train):len(y_train)+len(y_pred)], y_pred, label='Predicted', color = 'red')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.title('Stock Prices Prediction Plot - Tuned XGBOOST')
# plt.legend()

# # Adjust x-axis ticks to display only years
# plt.gca().xaxis.set_major_locator(plt.MaxNLocator(integer=True))
# plt.xticks(rotation=45)

# plt.show()
