### Stock Market Predictions NVIDIA 

In [32]:
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
from sklearn import metrics 
dat = yf.Ticker("NVDA")
df = yf.download(["NVDA"], period='120mo', interval='1d') # Por hora o por minuto, o por dia 
df[('Prev Close', 'NVDA')] = df[('Close', 'NVDA')].shift(1)  # Shifted by 1 to use previous day's close to predict today's
df.dropna(inplace=True)  # Remove rows with NaN values
df.tail()

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,Prev Close
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2025-04-08,96.300003,105.849998,94.459999,103.809998,476243400,97.639999
2025-04-09,114.330002,115.099998,97.529999,98.889999,612918300,96.300003
2025-04-10,107.57,110.860001,99.150002,109.370003,437812400,114.330002
2025-04-11,110.93,111.550003,107.480003,108.5,312853800,107.57
2025-04-14,110.709999,114.290001,109.07,114.139999,259339786,110.93


In [33]:
import pyodbc 


try:
    with pyodbc.connect(r'DRIVER={ODBC Driver 17 for SQL Server};SERVER=localhost;DATABASE=stock_market;Trusted_Connection=yes;')  as conn: # Si se pierde la conexion, cierra la sesion
        cursor = conn.cursor()
        # Method 2: Using itertuples() - faster than iterrows()
        for row in df.itertuples():
            print(f"Index: {row.Index}")
            print(row)
            
            data = {
                'ticker': 'NVDA',
                'date': row.Index,
                'close': row[1],
                'high': row[2],
                'low': row[3],
                'open': row[4],
                'volume': row[5]
            }
            print(data)
            cursor.execute("""
                INSERT INTO StockPrices (Ticker, "Date", "Close", "High", "Low", "Open", Volume)
                SELECT ?, ?, ?, ?, ?, ?, ?
                WHERE NOT EXISTS (
                    SELECT 1 FROM StockPrices 
                    WHERE Ticker = ? AND Date = ?
                )
            """, 
            data['ticker'], data['date'], data['close'], data['high'],
            data['low'], data['open'], data['volume'],
            data['ticker'], data['date'])

            cursor.execute("""
            UPDATE StockPrices
            SET     
                "Close" = COALESCE("Close", ?),
                "High" = COALESCE("High", ?),
                "Low" = COALESCE("Low", ?),
                "Open" = COALESCE("Open", ?),
                "Volume" = COALESCE("Volume", ?)
            WHERE Ticker = ? AND "Date" = ?
            """, data['close'], data['high'],
            data['low'], data['open'], data['volume'], data['ticker'], data["date"])
        
            if cursor.rowcount > 0:
                print("New record inserted successfully")
            else:
                print("Record already exists - no insertion made")
            
except pyodbc.Error as e:
    print(f"Database error: {e}")

Index: 2015-04-16 00:00:00
Pandas(Index=Timestamp('2015-04-16 00:00:00'), _1=0.5418150424957275, _2=0.5466333129120852, _3=0.5379604146749787, _4=0.5420559387850512, _5=155284000, _6=0.5451875329017639)
{'ticker': 'NVDA', 'date': Timestamp('2015-04-16 00:00:00'), 'close': 0.5418150424957275, 'high': 0.5466333129120852, 'low': 0.5379604146749787, 'open': 0.5420559387850512, 'volume': 155284000}
New record inserted successfully
Index: 2015-04-17 00:00:00
Pandas(Index=Timestamp('2015-04-17 00:00:00'), _1=0.5348286628723145, _2=0.5401287845910699, _3=0.5302512876351314, _4=0.5304921839828796, _5=293636000, _6=0.5418150424957275)
{'ticker': 'NVDA', 'date': Timestamp('2015-04-17 00:00:00'), 'close': 0.5348286628723145, 'high': 0.5401287845910699, 'low': 0.5302512876351314, 'open': 0.5304921839828796, 'volume': 293636000}
New record inserted successfully
Index: 2015-04-20 00:00:00
Pandas(Index=Timestamp('2015-04-20 00:00:00'), _1=0.5321784019470215, _2=0.5406103448765697, _3=0.531214702093670

##### XGBoost Model

In [34]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Fetch historical data
def get_stock_data():
    ticker = "NVDA"
    df = yf.download(ticker, period='120mo', interval='1d')
    df = df[['Close']].copy()
    df.columns = ['close']
    
    # Create features
    df['prev_close'] = df['close'].shift(1)
    df['sma_5'] = df['close'].rolling(window=5).mean()
    df['sma_10'] = df['close'].rolling(window=10).mean()
    df['daily_return'] = df['close'].pct_change()
    df.dropna(inplace=True)
    return df

In [35]:
# Prepare data for modeling
def prepare_data(df):
    # Create target (next day's closing price)
    df['target'] = df['close'].shift(-1)
    df.dropna(inplace=True)
    
    X = df.drop(['target'], axis=1)
    y = df['target']
    

In [36]:
# Prepare data for modeling
def prepare_data(df):
    # Create target (next day's closing price)
    df['target'] = df['close'].shift(-1)
    df.dropna(inplace=True)
    
    X = df.drop(['target'], axis=1)
    y = df['target']
    
    # Scale features
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, shuffle=False)
    
    return X_train, X_test, y_train, y_test, scaler, X, y

In [37]:
# Train models and evaluate
def train_and_evaluate(X_train, X_test, y_train, y_test):
    models = {
        
        'XGBoost': XGBRegressor(n_estimators=100, random_state=42)
    }
    
    results = {}
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        r2 = r2_score(y_test, y_pred)
        results[name] = {
            'model': model,
            'r2_score': r2,
            'predictions': y_pred
        }
        print(f"{name} R-squared: {r2:.4f}")
    
    return results

# Make prediction for tomorrow
def predict_tomorrow(models, df, scaler):
    # Prepare most recent data point
    last_data = df.iloc[-1:].drop(['target'], axis=1)
    last_data_scaled = scaler.transform(last_data)
    
    predictions = {}
    tomorrow_date = (datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')
    
    for name, result in models.items():
        pred_price = result['model'].predict(last_data_scaled)[0]
        predictions[name] = {
            'date': tomorrow_date,
            'predicted_price': round(pred_price, 2),
            'model': name,
            'r2_score': result['r2_score']
        }
    
    return pd.DataFrame.from_dict(predictions, orient='index')

# Main execution
if __name__ == "__main__":
    # Get and prepare data
    df = get_stock_data()
    X_train, X_test, y_train, y_test, scaler, X, y = prepare_data(df)
    
    # Train and evaluate models
    model_results = train_and_evaluate(X_train, X_test, y_train, y_test)
    
    # Make tomorrow's prediction
    tomorrow_pred = predict_tomorrow(model_results, df, scaler)
    print("\nTomorrow's Predictions:")
    print(tomorrow_pred[['date', 'predicted_price', 'r2_score']])
    
    # Save predictions to database (using your existing code)
    try:
        with pyodbc.connect(r'DRIVER={ODBC Driver 17 for SQL Server};SERVER=localhost;DATABASE=stock_market;Trusted_Connection=yes;') as conn:
            cursor = conn.cursor()
            
            for index, row in tomorrow_pred.iterrows():
                cursor.execute("""
                    INSERT INTO StockPredictions (Ticker, Date, Model, PredictedPrice, R2Score)
                    VALUES (?, ?, ?, ?, ?)
                """, 'NVDA', row['date'], row['model'], row['predicted_price'], row['r2_score'])
                
            conn.commit()
            print("Predictions saved to database successfully")
            
    except pyodbc.Error as e:
        print(f"Database error: {e}")

[*********************100%***********************]  1 of 1 completed

XGBoost R-squared: -1.9783

Tomorrow's Predictions:
               date  predicted_price  r2_score
XGBoost  2025-04-15        32.200001 -1.978267
Predictions saved to database successfully





In [38]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Create target - tomorrow's close
df['Target'] = df[('Close', 'NVDA')].shift(-1)
df.dropna(inplace=True)  # Remove the last row with NaN target

# Step 2: Feature engineering - basic features
df['Return'] = df[('Close', 'NVDA')].pct_change()
df['Volatility'] = df['Return'].rolling(window=5).std()
df['Volume_Change'] = df[('Volume', 'NVDA')].pct_change()
df['Price_Range'] = df[('High', 'NVDA')] - df[('Low', 'NVDA')]

df.dropna(inplace=True)

# Features and target
features = ['Return', 'Volatility', 'Volume_Change', 'Price_Range']
X = df[features]
y = df['Target']

# Step 3: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

# Step 4: Train XGBoost
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)

# Step 5: Evaluate model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Step 6: Predict tomorrow’s close
latest = df[features].iloc[-1:]
predicted_close = model.predict(latest)
print(f"Predicted close for tomorrow: ${predicted_close[0]:.2f}")


KeyError: ('Close', 'NVDA')