In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
#from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import xgboost as xgb
import requests

# Step 1: Download NVIDIA stock data
ticker = 'NVDA'
df = yf.download(ticker, period='6mo', interval='1d')
df['Prev Close'] = df['Close'].shift(1)
df['Return'] = df['Close'].pct_change()
df['Volatility'] = df['Return'].rolling(window=3).std()
df['Price_Range'] = df['High'] - df['Low']
df.dropna(inplace=True)

# Step 2: Get news headlines (using NewsAPI or similar)
def get_news_sentiment(keyword='NVIDIA', days=5):
    #analyzer = SentimentIntensityAnalyzer()
    base_url = "https://newsapi.org/v2/everything"
    api_key = 'YOUR_NEWS_API_KEY'  # ← get from https://newsapi.org/

    end_date = datetime.now()
    start_date = end_date - timedelta(days=days)
    
    params = {
        'q': keyword,
        'from': start_date.strftime('%Y-%m-%d'),
        'to': end_date.strftime('%Y-%m-%d'),
        'sortBy': 'relevancy',
        'apiKey': api_key,
        'language': 'en',
        'pageSize': 50
    }

    response = requests.get(base_url, params=params)
    headlines = [article['title'] for article in response.json().get('articles', [])]

    sentiment_scores = []
    for headline in headlines:
        #score = analyzer.polarity_scores(headline)['compound']
        sentiment_scores.append(score)

    avg_sentiment = np.mean(sentiment_scores) if sentiment_scores else 0
    return avg_sentiment

# Step 3: Create sentiment feature
sentiment_values = []
dates = df.index

print("Gathering sentiment...")
for date in dates:
    sentiment = get_news_sentiment(days=1)
    sentiment_values.append(sentiment)

df['Sentiment'] = sentiment_values

# Step 4: Target = Tomorrow’s Close
df['Target'] = df['Close'].shift(-1)
df.dropna(inplace=True)

# Step 5: Build model
features = ['Prev Close', 'Return', 'Volatility', 'Price_Range', 'Sentiment']
X = df[features]
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
model.fit(X_train, y_train)

# Step 6: Evaluate
preds = model.predict(X_test)
mse = mean_squared_error(y_test, preds)
print(f"Mean Squared Error: {mse:.2f}")

# Step 7: Predict tomorrow
latest_data = df[features].iloc[-1:]
predicted = model.predict(latest_data)
print(f"Predicted closing price for tomorrow: ${predicted[0]:.2f}")


[*********************100%***********************]  1 of 1 completed


Gathering sentiment...
Mean Squared Error: 110.44
Predicted closing price for tomorrow: $123.02
