#### GXBoost Model

In [3]:
# Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
import yfinance as yf

In [4]:
dat = yf.Ticker("NVDA")
df = yf.download(["NVDA"], period='12mo', interval='1d') # Por hora o por minuto, o por dia 
df[('Prev Close', 'NVDA')] = df[('Close', 'NVDA')].shift(1)  # Shifted by 1 to use previous day's close to predict today's
df.dropna(inplace=True)  # Remove rows with NaN values
df.tail()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,Prev Close
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2025-04-02,110.419998,111.980003,106.790001,107.290001,220601200,110.150002
2025-04-03,101.800003,105.629997,101.599998,103.510002,338769400,110.419998
2025-04-04,94.309998,100.129997,92.110001,98.910004,532273800,101.800003
2025-04-07,97.639999,101.75,86.620003,87.459999,611041300,94.309998
2025-04-08,96.300003,105.849998,94.459999,103.805,468737984,97.639999


In [6]:
import pyodbc 


try:
    with pyodbc.connect(r'DRIVER={ODBC Driver 17 for SQL Server};SERVER=localhost;DATABASE=stock_market;Trusted_Connection=yes;')  as conn: # Si se pierde la conexion, cierra la sesion
        cursor = conn.cursor()
        # Method 2: Using itertuples() - faster than iterrows()
        for row in df.itertuples():
            print(f"Index: {row.Index}")
            print(row)
            
            data = {
                'ticker': 'NVDA',
                'date': row.Index,
                'close': row[1],
                'high': row[2],
                'low': row[3],
                'open': row[4],
                'volume': row[5]
            }
            print(data)
            cursor.execute("""
                INSERT INTO StockPrices (Ticker, "Date", "Close", "High", "Low", "Open", Volume)
                SELECT ?, ?, ?, ?, ?, ?, ?
                WHERE NOT EXISTS (
                    SELECT 1 FROM StockPrices 
                    WHERE Ticker = ? AND Date = ?
                )
            """, 
            data['ticker'], data['date'], data['close'], data['high'],
            data['low'], data['open'], data['volume'],
            data['ticker'], data['date'])

            cursor.execute("""
            UPDATE StockPrices
            SET     
                "Close" = COALESCE("Close", ?),
                "High" = COALESCE("High", ?),
                "Low" = COALESCE("Low", ?),
                "Open" = COALESCE("Open", ?),
                "Volume" = COALESCE("Volume", ?)
            WHERE Ticker = ? AND "Date" = ?
            """, data['close'], data['high'],
            data['low'], data['open'], data['volume'], data['ticker'], data["date"])
        
            if cursor.rowcount > 0:
                print("New record inserted successfully")
            else:
                print("Record already exists - no insertion made")
            
except pyodbc.Error as e:
    print(f"Database error: {e}")

Index: 2024-04-10 00:00:00
Pandas(Index=Timestamp('2024-04-10 00:00:00'), _1=87.01040649414062, _2=87.3712879556285, _3=83.68149867040677, _4=83.89843024698811, _5=431929000, _6=85.3259506225586)
{'ticker': 'NVDA', 'date': Timestamp('2024-04-10 00:00:00'), 'close': 87.01040649414062, 'high': 87.3712879556285, 'low': 83.68149867040677, 'open': 83.89843024698811, 'volume': 431929000}
New record inserted successfully
Index: 2024-04-11 00:00:00
Pandas(Index=Timestamp('2024-04-11 00:00:00'), _1=90.58623504638672, _2=90.70919574606373, _3=86.89745219051976, _4=87.39128560717941, _5=431637000, _6=87.01040649414062)
{'ticker': 'NVDA', 'date': Timestamp('2024-04-11 00:00:00'), 'close': 90.58623504638672, 'high': 90.70919574606373, 'low': 86.89745219051976, 'open': 87.39128560717941, 'volume': 431637000}
New record inserted successfully
Index: 2024-04-12 00:00:00
Pandas(Index=Timestamp('2024-04-12 00:00:00'), _1=88.15702819824219, _2=90.1453808587556, _3=87.50124546153059, _4=89.66953167082109, 

In [15]:
import yfinance as yf
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Fetch and prepare data
df = yf.download("NVDA", period='12mo', interval='1d')  # Single ticker = no multi-index columns
df['Prev Close'] = df['Close'].shift(1)  # Simplified column name
df['Daily Return'] = df['Close'].pct_change()
df.dropna(inplace=True)

# Define features (X) and target (y)
X = df[['Prev Close', 'Daily Return']].values  # Use cleaned column names
y = df['Close'].values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train XGBoost
xgb_model = XGBRegressor(n_estimators=1000, max_depth=6, learning_rate=0.01)
xgb_model.fit(X_train, y_train)

# Evaluate
y_pred = xgb_model.predict(X_test)
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")

[*********************100%***********************]  1 of 1 completed


MAE: 1.10


In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Fetch and prepare data
dat = yf.Ticker("NVDA")
df = yf.download(["NVDA"], period='12mo', interval='1d')
df[('Prev Close', 'NVDA')] = df[('Close', 'NVDA')].shift(1)  # Feature: Previous day's close
df.dropna(inplace=True)

# Feature Engineering
df['Daily Return'] = df[('Close', 'NVDA')].pct_change()  # Additional feature
df.dropna(inplace=True)

# Define features (X) and target (y)
X = df[['Prev Close', 'Daily Return']].values  # Using previous close and daily return
y = df[('Close', 'NVDA')].values  # Target: Current day's close

# Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)

# Initialize and train XGBoost model
xgb_model = XGBRegressor(
    n_estimators=1000,  # Number of boosting rounds
    max_depth=6,        # Depth of each tree
    learning_rate=0.01, # Shrinkage to prevent overfitting
    objective='reg:squarederror',
    early_stopping_rounds=50,
    random_state=42
)

xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    verbose=10  # Print progress every 10 iterations
)

# Predict and evaluate
#y_pred = xgb_model.predict(X_test)
#print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
#print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")

# Feature Importance
print("Feature Importance:")
for feature, importance in zip([('Prev Close', 'NVDA'), 'Daily Return'], xgb_model.feature_importances_):
    print(f"{feature}: {importance:.2f}")

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

[0]	validation_0-rmse:11.80051
[10]	validation_0-rmse:10.70887
[20]	validation_0-rmse:9.76390
[30]	validation_0-rmse:8.95630
[40]	validation_0-rmse:8.25740
[50]	validation_0-rmse:7.59144
[60]	validation_0-rmse:6.98660





[70]	validation_0-rmse:6.43527
[80]	validation_0-rmse:5.93388
[90]	validation_0-rmse:5.49201
[100]	validation_0-rmse:5.11688
[110]	validation_0-rmse:4.75284
[120]	validation_0-rmse:4.43843
[130]	validation_0-rmse:4.16772
[140]	validation_0-rmse:3.92263
[150]	validation_0-rmse:3.70603
[160]	validation_0-rmse:3.51363
[170]	validation_0-rmse:3.34300
[180]	validation_0-rmse:3.19886
[190]	validation_0-rmse:3.07322
[200]	validation_0-rmse:2.95873
[210]	validation_0-rmse:2.84663
[220]	validation_0-rmse:2.73565
[230]	validation_0-rmse:2.64638
[240]	validation_0-rmse:2.56419
[250]	validation_0-rmse:2.49123
[260]	validation_0-rmse:2.42632
[270]	validation_0-rmse:2.36882
[280]	validation_0-rmse:2.32104
[290]	validation_0-rmse:2.27789
[300]	validation_0-rmse:2.23798
[310]	validation_0-rmse:2.20151
[320]	validation_0-rmse:2.17032
[330]	validation_0-rmse:2.14093
[340]	validation_0-rmse:2.11449
[350]	validation_0-rmse:2.09061
[360]	validation_0-rmse:2.06628
[370]	validation_0-rmse:2.04517
[380]	valid