In [103]:
import yfinance as yf
from newsapi import NewsApiClient
from textblob import TextBlob
import pandas as pd
import requests
import numpy as np
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler

class MarketSentimentAnalyzer:
    def __init__(self, news_api_key):
        self.newsapi = NewsApiClient(api_key=news_api_key)
        
    def get_sentiment_scores(self, symbol, end_date, lookback_days=30):
        """
        Get sentiment scores for the last 30 days
        
        Args:
            symbol (str): Stock symbol
            end_date (datetime): End date
            lookback_days (int): Number of days to look back (default: 30)
            
        Returns:
            pd.DataFrame: Daily sentiment scores
        """
        # Calculate start date as 30 days before end date
        start_date = end_date - timedelta(days=lookback_days)
        
        # Convert dates to string format
        start_str = start_date.strftime('%Y-%m-%d')
        end_str = end_date.strftime('%Y-%m-%d')
        
        # Get news articles
        articles = self.newsapi.get_everything(
            q=symbol,
            from_param=start_str,
            to=end_str,
            language='en',
            sort_by='publishedAt'
        )
        
        # Process daily sentiment
        daily_sentiment = {}
        for article in articles['articles']:
            date = article['publishedAt'][:10]
            text = article['title'] + ' ' + article['description']
            blob = TextBlob(text)
            
            if date not in daily_sentiment:
                daily_sentiment[date] = []
            daily_sentiment[date].append(blob.sentiment.polarity)
        
        # Average daily sentiments
        sentiment_df = pd.DataFrame([
            {'Date': date, 'Sentiment': np.mean(scores)}
            for date, scores in daily_sentiment.items()
        ])
        sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date'])
        sentiment_df.set_index('Date', inplace=True)
        
        return sentiment_df

In [104]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader

class AdvancedStockDataset(Dataset):
    def __init__(self, X_market, X_sentiment, y_price, y_direction, y_volatility):
        self.X_market = torch.FloatTensor(X_market)
        self.X_sentiment = torch.FloatTensor(X_sentiment)
        self.y_price = torch.FloatTensor(y_price)
        self.y_direction = torch.LongTensor(y_direction)
        self.y_volatility = torch.FloatTensor(y_volatility)
        
    def __len__(self):
        return len(self.X_market)
    
    def __getitem__(self, idx):
        return (self.X_market[idx], self.X_sentiment[idx], 
                self.y_price[idx], self.y_direction[idx], self.y_volatility[idx])

class AdvancedLSTMPredictor(nn.Module):
    def __init__(self, market_dim, hidden_dim, num_layers, dropout):
        super().__init__()
        
        # Market data LSTM
        self.market_lstm = nn.LSTM(market_dim, hidden_dim, num_layers,
                                batch_first=True, dropout=dropout)
        
        # Output heads
        self.price_head = nn.Linear(hidden_dim, 1)
        self.direction_head = nn.Linear(hidden_dim, 2)
        self.volatility_head = nn.Linear(hidden_dim, 1)
        
    def forward(self, x_market):
        # Process market data
        market_out, _ = self.market_lstm(x_market)
        features = market_out[:, -1, :]  # Get the last hidden state
        
        # Generate predictions directly from market features
        price_pred = self.price_head(features)
        direction_pred = self.direction_head(features)
        volatility_pred = self.volatility_head(features)
        
        return price_pred, direction_pred, volatility_pred

import torch
from sklearn.preprocessing import MinMaxScaler

def prepare_market_data(df_market):
    """
    Prepare market data for training using all available features and all timeframes
    
    Args:
        df_market (pd.DataFrame): Market data with technical indicators
        
    Returns:
        tuple: (X_data, y_price, y_direction, y_volatility, scaler)
    """
    # Get all available features
    features = df_market.columns.tolist()
    
    # Handle any NaN values
    df_market = df_market.fillna(0)
    
    # Scale all features
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df_market)
    
    # Convert to tensor format
    X_data = torch.FloatTensor(scaled_data[:-1])  # All data except last row
    
    # Price target (next day's close price)
    close_idx = features.index('Close')
    y_price = torch.FloatTensor(scaled_data[1:, close_idx])  # Shifted by 1 day
    
    # Direction target (1 if price went up, 0 if down)
    price_directions = (scaled_data[1:, close_idx] > scaled_data[:-1, close_idx]).astype(int)
    y_direction = torch.LongTensor(price_directions)
    
    # Volatility target (using rolling standard deviation of returns)
    returns = np.diff(scaled_data[:, close_idx]) / scaled_data[:-1, close_idx]
    volatility = np.std(returns) * np.sqrt(252)  # Annualized volatility
    y_volatility = torch.FloatTensor([volatility] * len(y_price))
    
    return X_data, y_price, y_direction, y_volatility, scaler


class CustomLoss:
    def __init__(self, price_weight=1.0, direction_weight=0.3, volatility_weight=0.2):
        self.price_criterion = nn.MSELoss()
        self.direction_criterion = nn.CrossEntropyLoss()
        self.volatility_criterion = nn.MSELoss()
        
        self.price_weight = price_weight
        self.direction_weight = direction_weight
        self.volatility_weight = volatility_weight
    
    def __call__(self, predictions, targets):
        price_pred, direction_pred, volatility_pred = predictions
        price_true, direction_true, volatility_true = targets
        
        price_loss = self.price_criterion(price_pred, price_true.unsqueeze(1))
        direction_loss = self.direction_criterion(direction_pred, direction_true)
        volatility_loss = self.volatility_criterion(volatility_pred, volatility_true.unsqueeze(1))
        
        return (self.price_weight * price_loss +
                self.direction_weight * direction_loss +
                self.volatility_weight * volatility_loss)


In [105]:
import yfinance as yf
import pandas as pd
import numpy as np
from ta.trend import MACD
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands

def get_stock_data(ticker, period='2y'):
    """
    Fetch historical stock data using yfinance
    
    Args:
        ticker (str): Stock ticker symbol
        period (str): Time period to fetch (default: '2y')
        
    Returns:
        pd.DataFrame: Historical OHLCV data
    """
    stock = yf.Ticker(ticker)
    df = stock.history(period=period)
    return df

def create_features(df):
    """
    Create technical indicators as features
    
    Args:
        df (pd.DataFrame): OHLCV DataFrame
        
    Returns:
        pd.DataFrame: DataFrame with additional technical indicators
    """
    # Moving averages
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    
    # RSI
    rsi = RSIIndicator(df['Close'])
    df['RSI'] = rsi.rsi()
    
    # MACD
    macd = MACD(df['Close'])
    df['MACD'] = macd.macd()
    df['MACD_signal'] = macd.macd_signal()
    
    # Bollinger Bands
    bb = BollingerBands(df['Close'])
    df['BB_upper'] = bb.bollinger_hband()
    df['BB_lower'] = bb.bollinger_lband()
    
    # Percentage changes
    df['Returns'] = df['Close'].pct_change()
    df['Volatility'] = df['Returns'].rolling(window=20).std()
    
    # Remove NaN values
    df.dropna(inplace=True)
    
    return df

In [106]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize sentiment analyzer
sentiment_analyzer = MarketSentimentAnalyzer(news_api_key='6dc6bc012b824525afa5969ae097f460')

# Get market data
df_market = get_stock_data('AAPL', period='5y')
df_market = create_features(df_market)

Using device: cuda


In [107]:
print(df_market.tail())
print(df_market.columns)
print(len(df_market))

                                 Open        High         Low       Close  \
Date                                                                        
2025-02-24 00:00:00-05:00  244.929993  248.860001  244.419998  247.100006   
2025-02-25 00:00:00-05:00  248.000000  250.000000  244.910004  247.039993   
2025-02-26 00:00:00-05:00  244.330002  244.979996  239.130005  240.360001   
2025-02-27 00:00:00-05:00  239.410004  242.460007  237.059998  237.300003   
2025-02-28 00:00:00-05:00  236.949997  242.089996  230.199997  241.839996   

                             Volume  Dividends  Stock Splits      SMA_20  \
Date                                                                       
2025-02-24 00:00:00-05:00  51326400        0.0           0.0  237.186266   
2025-02-25 00:00:00-05:00  48013300        0.0           0.0  238.057889   
2025-02-26 00:00:00-05:00  44433600        0.0           0.0  238.175973   
2025-02-27 00:00:00-05:00  41153600        0.0           0.0  238.086117   
2025

In [108]:
# Get sentiment data
end_date = df_market.index[-1]
df_sentiment = sentiment_analyzer.get_sentiment_scores(
    'AAPL',
    end_date,
    lookback_days=20
)

In [109]:
print(df_sentiment.head())

            Sentiment
Date                 
2025-02-28   0.192020
2025-02-27   0.110859
2025-02-26   0.141383
2025-02-25   0.142170
2025-02-24   0.099883


In [110]:
# Prepare the datasets
X, y_price, y_direction, y_volatility, scaler = prepare_market_data(df_market)

# Split into train/test sets
train_size = int(len(X) * 0.8)

X_train = X[:train_size]
y_price_train = y_price[:train_size]
y_direction_train = y_direction[:train_size]
y_volatility_train = y_volatility[:train_size]

X_test = X[train_size:]
y_price_test = y_price[train_size:]
y_direction_test = y_direction[train_size:]
y_volatility_test = y_volatility[train_size:]

  returns = np.diff(scaled_data[:, close_idx]) / scaled_data[:-1, close_idx]
  x = asanyarray(arr - arrmean)


In [111]:
print(X_test.shape, y_price_test.shape, y_direction_test.shape, y_volatility_test.shape)
print(X_train.shape, y_price_train.shape, y_direction_train.shape, y_volatility_train.shape)
print(X_train[0], y_price_train[0], y_direction_train[0], y_volatility_train[0])

torch.Size([242, 16]) torch.Size([242]) torch.Size([242]) torch.Size([242])
torch.Size([965, 16]) torch.Size([965]) torch.Size([965]) torch.Size([965])
tensor([0.0102, 0.0120, 0.0093, 0.0097, 0.3495, 0.0000, 0.0000, 0.0000, 0.0000,
        0.7970, 0.5885, 0.5397, 0.0000, 0.0011, 0.5185, 0.4971]) tensor(0.0050) tensor(0) tensor(nan)


In [116]:
# Split data
# Create DataLoader for training
from torch.utils.data import TensorDataset, DataLoader

# Ensure X_train and X_test have the correct shape [batch_size, sequence_length, feature_dim]
X_train = X_train.unsqueeze(1) if len(X_train.shape) == 2 else X_train
X_test = X_test.unsqueeze(1) if len(X_test.shape) == 2 else X_test

train_dataset = TensorDataset(X_train, y_price_train, y_direction_train, y_volatility_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test, y_price_test, y_direction_test, y_volatility_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize model
market_dim = X_train.shape[2] if len(X_train.shape) > 2 else 1
model = AdvancedLSTMPredictor(
    market_dim=market_dim,
    hidden_dim=50,
    num_layers=2,
    dropout=0.2
).to(device)

# Training parameters
criterion = CustomLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
num_epochs = 50

# Training loop
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch_market, batch_y_price, batch_y_dir, batch_y_vol in train_loader:
        # Move data to device
        batch_market = batch_market.to(device)
        batch_y_price = batch_y_price.to(device)
        batch_y_dir = batch_y_dir.to(device)
        batch_y_vol = batch_y_vol.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        price_pred, direction_pred, volatility_pred = model(batch_market)
        
        # Calculate loss
        loss = criterion(
            (price_pred, direction_pred, volatility_pred),
            (batch_y_price, batch_y_dir, batch_y_vol)
        )
        
        # Backward pass
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')

Epoch [10/50], Loss: nan
Epoch [20/50], Loss: nan
Epoch [30/50], Loss: nan
Epoch [40/50], Loss: nan
Epoch [50/50], Loss: nan
