In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import requests
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta

In [10]:
# Polygon API Key (Replace with your own)
POLYGON_API_KEY = 'uwQtl3txGt5BLbecq7ZbIu0ZbuitCGjc'

In [11]:
# Function to fetch stock data from Polygon
def fetch_stock_data(symbol, start_date, end_date):
    url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}?apiKey={POLYGON_API_KEY}"
    response = requests.get(url)
    data = response.json()

    if 'results' not in data:
        return None

    df = pd.DataFrame(data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms').dt.strftime('%m-%d-%Y')
    df.set_index('date', inplace=True)
    df.rename(columns={'o': 'open', 'h': 'high', 'l': 'low',
              'c': 'close', 'v': 'volume'}, inplace=True)
    return df[['open', 'high', 'low', 'close', 'volume']]

# Function to fetch fundamental data from Polygon


def fetch_fundamental_data(symbol):
    url = f"https://api.polygon.io/v2/reference/financials/{symbol}?apiKey={POLYGON_API_KEY}"
    response = requests.get(url)
    data = response.json()

    if 'results' not in data:
        return None

    metrics = data['results'][0]
    ev_ebitda = metrics['enterpriseValue'] / \
        metrics['earningsBeforeInterestTaxesDepreciationAmortization'] if metrics[
            'earningsBeforeInterestTaxesDepreciationAmortization'] else np.nan
    pe_ratio = metrics['priceToEarningsRatio']

    return {'EV/EBITDA': ev_ebitda, 'P/E': pe_ratio}

# Function to compute technical indicators


def compute_technical_indicators(df):
    df['RSI'] = compute_rsi(df['close'])
    df['Stoch'] = compute_stochastic(df['close'], df['low'], df['high'])
    df['SMA_50'] = df['close'].rolling(window=50).mean()
    df['SMA_200'] = df['close'].rolling(window=200).mean()
    df.dropna(inplace=True)
    return df

# RSI Calculation


def compute_rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# Stochastic Oscillator Calculation


def compute_stochastic(close, low, high, period=14):
    lowest_low = low.rolling(window=period).min()
    highest_high = high.rolling(window=period).max()
    return ((close - lowest_low) / (highest_high - lowest_low)) * 100

# Fetch and process data


def prepare_data(symbol, start_date, end_date):
    df = fetch_stock_data(symbol, start_date, end_date)
    fundamentals = fetch_fundamental_data(symbol)

    if df is not None and fundamentals is not None:
        df = compute_technical_indicators(df)
        df['EV/EBITDA'] = fundamentals['EV/EBITDA']
        df['P/E'] = fundamentals['P/E']
        df.to_csv(f'{symbol}.csv',index=True)


        scaler = StandardScaler()
        features = scaler.fit_transform(df.drop(columns=['close']))
        labels = (df['close'].shift(-1) > df['close']).astype(int).dropna()
        # Align features and labels
        features, labels = features[:-1], labels.values
        return torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32)
    return None, None

# Transformer Model for Binomial Classification


class StockTransformer(nn.Module):
    def __init__(self, input_dim, embed_dim, num_heads, num_layers, ff_dim, dropout=0.1):
        super(StockTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=embed_dim, nhead=num_heads, dim_feedforward=ff_dim, dropout=dropout)
        self.transformer = nn.TransformerEncoder(
            encoder_layers, num_layers=num_layers)
        self.fc = nn.Linear(embed_dim, 1)  # Logistic regression layer
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global average pooling
        x = self.fc(x)
        return self.sigmoid(x)

# Training setup


def train_model(model, train_data, train_labels, epochs=50, lr=0.001):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(train_data).squeeze()
        loss = criterion(outputs, train_labels)
        loss.backward()
        optimizer.step()

        if (epoch+1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


# Example Usage
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
end_date = datetime.now().strftime('%Y-%m-%d')

symbol = "TSLA"
features, labels = prepare_data(symbol, start_date, end_date)

if features is not None and labels is not None:
    features = features.unsqueeze(1)  # Add sequence length dimension
    model = StockTransformer(
        input_dim=features.shape[2], embed_dim=64, num_heads=4, num_layers=2, ff_dim=128)
    train_model(model, features, labels, epochs=50)
else:
    print("Failed to fetch data.")



ValueError: Using a target size (torch.Size([51])) that is different to the input size (torch.Size([50])) is deprecated. Please ensure they have the same size.