In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

class OptionsDataset(Dataset):
    """Dataset for options with data augmentation"""
    def __init__(self, features, targets, option_types, augment=False):
        self.features = torch.from_numpy(features.astype(np.float32))
        self.targets = torch.from_numpy(targets.astype(np.float32))
        self.option_types = torch.from_numpy(option_types.astype(np.float32))
        self.augment = augment
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        features = self.features[idx]
        targets = self.targets[idx]
        option_type = self.option_types[idx]
        
        if self.augment and torch.rand(1) > 0.5:
            noise = torch.normal(0, 0.002, features.shape)
            features = features + noise
            
        return features, targets, option_type

class TransformerDeltaPredictor(nn.Module):
    """Transformer-based delta predictor for options"""
    def __init__(self, input_features=8, d_model=128, nhead=8, num_layers=4, dropout=0.2):
        super(TransformerDeltaPredictor, self).__init__()
        
        self.input_features = input_features
        self.d_model = d_model
        
        # Input projection
        self.input_projection = nn.Linear(input_features, d_model)
        
        # Positional encoding
        self.pos_encoding = nn.Parameter(torch.randn(1, d_model))
        
        # Option type embedding
        self.option_type_embedding = nn.Embedding(2, 32)
        
        # Transformer encoder layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model + 32,  # Add space for option type embedding
            nhead=nhead,
            dim_feedforward=(d_model + 32) * 4,
            dropout=dropout,
            activation='gelu',
            batch_first=True,
            norm_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Multi-head attention for feature importance
        self.feature_attention = nn.MultiheadAttention(
            d_model + 32, nhead, dropout=dropout, batch_first=True
        )
        
        # Delta prediction head
        self.delta_predictor = nn.Sequential(
            nn.Linear(d_model + 32, (d_model + 32) // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear((d_model + 32) // 2, 64),
            nn.GELU(),
            nn.Dropout(dropout // 2),
            nn.Linear(64, 1),
            nn.Sigmoid()  # Output 0 to 1 for absolute delta
        )
        
        # Delta change predictor (uncertainty/volatility)
        self.delta_change_predictor = nn.Sequential(
            nn.Linear(d_model + 32, 64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64, 32),
            nn.GELU(),
            nn.Linear(32, 1),
            nn.Softplus()  # Ensure positive output for change magnitude
        )
        
        # Temperature parameter for calibration
        self.temperature = nn.Parameter(torch.ones(1))
        
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, 0, 0.1)
    
    def forward(self, x, option_types):
        batch_size = x.size(0)
        
        # Project input features
        x_proj = self.input_projection(x)  # (batch, d_model)
        
        # Add positional encoding
        x_proj = x_proj + self.pos_encoding  # (batch, d_model)
        
        # Option type embedding
        type_emb = self.option_type_embedding(option_types.long())  # (batch, 32)
        
        # Combine features with option type
        combined = torch.cat([x_proj, type_emb], dim=1)  # (batch, d_model + 32)
        
        # Add sequence dimension for transformer
        combined = combined.unsqueeze(1)  # (batch, 1, d_model + 32)
        
        # Transformer processing
        transformer_out = self.transformer(combined)  # (batch, 1, d_model + 32)
        
        # Feature attention
        attended_out, attention_weights = self.feature_attention(
            transformer_out, transformer_out, transformer_out
        )
        
        # Residual connection
        final_features = (transformer_out + attended_out).squeeze(1)  # (batch, d_model + 32)
        
        # Predict absolute delta
        abs_delta = self.delta_predictor(final_features)
        
        # Predict delta change potential
        delta_change = self.delta_change_predictor(final_features)
        
        # Apply correct sign based on option type
        call_mask = (option_types == 1).float().unsqueeze(1)
        put_mask = (option_types == 0).float().unsqueeze(1)
        
        # Calls: positive delta, Puts: negative delta
        signed_delta = call_mask * abs_delta + put_mask * (-abs_delta)
        
        # Temperature scaling
        signed_delta = signed_delta / self.temperature
        
        return signed_delta, delta_change, attention_weights

class TransformerOptionsPredictor:
    def __init__(self):
        self.scaler = RobustScaler()
        self.model = None
        self.model_trained = False
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.current_stock_price = 230.0  # Set to 230 as requested
        self.training_history = {'loss': [], 'val_loss': []}
        
        print(f"Device: {self.device}")
        print(f"Stock price set to: ${self.current_stock_price}")
    
    def generate_options_data_230(self):
        """Generate realistic options data centered around $230 strike"""
        print(f"Generating options data around ${self.current_stock_price} strike prices")
        
        options_list = []
        
        # Strike prices around 230
        strikes = np.arange(210, 251, 2.5)  # 210 to 250 in $2.50 increments
        
        # Expiration periods as requested
        expiration_configs = [
            {'days': 7, 'label': '1 week'},
            {'days': 30, 'label': '1 month'}, 
            {'days': 45, 'label': '45 days'}
        ]
        
        for strike in strikes:
            for exp_config in expiration_configs:
                days = exp_config['days']
                moneyness = strike / self.current_stock_price
                time_to_expiry = days / 365.0
                
                # Enhanced Black-Scholes-like delta calculation
                # Using more realistic parameters
                risk_free_rate = 0.05
                volatility = 0.25 + np.random.uniform(-0.05, 0.10)  # 20-35% IV range
                
                # Calculate d1 and d2 for Black-Scholes
                d1 = (np.log(self.current_stock_price / strike) + 
                     (risk_free_rate + 0.5 * volatility**2) * time_to_expiry) / (volatility * np.sqrt(time_to_expiry))
                
                # Call delta approximation
                from scipy.stats import norm
                call_delta = norm.cdf(d1)
                
                # Add some randomness for realism
                call_delta += np.random.normal(0, 0.02)
                call_delta = np.clip(call_delta, 0.01, 0.99)
                
                # Put delta from put-call parity
                put_delta = call_delta - 1
                
                # Calculate potential delta change based on gamma and time decay
                d2 = d1 - volatility * np.sqrt(time_to_expiry)
                gamma = norm.pdf(d1) / (self.current_stock_price * volatility * np.sqrt(time_to_expiry))
                
                # Delta change potential (simplified model)
                # Factors: gamma effect from price moves, time decay, volatility changes
                price_change_potential = self.current_stock_price * 0.1  # 10% move potential
                gamma_effect = gamma * price_change_potential
                
                volatility_effect = abs(d1) * 0.05  # Volatility change effect
                time_decay_effect = 0.1 / np.sqrt(time_to_expiry)  # Time decay impact
                
                delta_change_potential = gamma_effect + volatility_effect + time_decay_effect
                delta_change_potential = np.clip(delta_change_potential, 0.01, 0.5)
                
                # Generate market data
                volume_call = max(10, int(np.random.exponential(200)))
                volume_put = max(10, int(np.random.exponential(150)))
                
                expiration_date = (datetime.now() + timedelta(days=days)).strftime('%Y-%m-%d')
                
                # Add call option
                intrinsic_call = max(0, self.current_stock_price - strike)
                time_value = volatility * np.sqrt(time_to_expiry) * self.current_stock_price * 0.4
                theo_price_call = intrinsic_call + time_value
                
                options_list.append({
                    'symbol': 'STOCK',
                    'type': 'call',
                    'strike': strike,
                    'expiration': expiration_date,
                    'expiration_label': exp_config['label'],
                    'bid': max(0.05, theo_price_call - 0.15),
                    'ask': theo_price_call + 0.15,
                    'last': theo_price_call,
                    'volume': volume_call,
                    'open_interest': np.random.randint(100, 8000),
                    'implied_volatility': volatility,
                    'delta': call_delta,
                    'days_to_expiry': days,
                    'delta_change_potential': delta_change_potential
                })
                
                # Add put option
                intrinsic_put = max(0, strike - self.current_stock_price)
                theo_price_put = intrinsic_put + time_value
                
                options_list.append({
                    'symbol': 'STOCK',
                    'type': 'put',
                    'strike': strike,
                    'expiration': expiration_date,
                    'expiration_label': exp_config['label'],
                    'bid': max(0.05, theo_price_put - 0.15),
                    'ask': theo_price_put + 0.15,
                    'last': theo_price_put,
                    'volume': volume_put,
                    'open_interest': np.random.randint(100, 8000),
                    'implied_volatility': volatility,
                    'delta': put_delta,
                    'days_to_expiry': days,
                    'delta_change_potential': delta_change_potential
                })
        
        return pd.DataFrame(options_list)
    
    def prepare_training_data(self, options_df):
        """Prepare training data for transformer model"""
        print("Preparing training data for transformer...")
        
        # Enhanced feature engineering
        options_df['moneyness'] = options_df['strike'] / self.current_stock_price
        options_df['log_moneyness'] = np.log(options_df['moneyness'])
        options_df['sqrt_time'] = np.sqrt(options_df['days_to_expiry'] / 365)
        options_df['log_time'] = np.log(options_df['days_to_expiry'] / 365 + 1e-6)
        options_df['log_volume'] = np.log1p(options_df['volume'])
        options_df['log_open_interest'] = np.log1p(options_df['open_interest'])
        options_df['moneyness_time'] = options_df['moneyness'] * options_df['sqrt_time']
        options_df['iv_time'] = options_df['implied_volatility'] * options_df['sqrt_time']
        
        # Additional transformer-friendly features
        options_df['moneyness_squared'] = options_df['moneyness'] ** 2
        options_df['time_to_expiry_norm'] = options_df['days_to_expiry'] / 365
        
        feature_columns = [
            'log_moneyness', 'sqrt_time', 'log_time', 'implied_volatility',
            'log_volume', 'log_open_interest', 'moneyness_time', 'iv_time'
        ]
        
        # Clean data
        clean_df = options_df.dropna(subset=feature_columns + ['delta', 'delta_change_potential'])
        
        features = clean_df[feature_columns].values
        targets = clean_df['delta'].values.reshape(-1, 1)
        delta_changes = clean_df['delta_change_potential'].values.reshape(-1, 1)
        option_types = (clean_df['type'] == 'call').astype(int).values
        
        self.options_df = clean_df
        
        return features, targets, delta_changes, option_types
    
    def train_transformer_model(self, features, targets, delta_changes, option_types, epochs=120):
        """Train the transformer model"""
        print("Training Transformer Delta Predictor...")
        
        # Scale features
        features_scaled = self.scaler.fit_transform(features)
        
        # Split data
        indices = torch.randperm(len(features_scaled))
        split_idx = int(0.8 * len(features_scaled))
        
        train_indices = indices[:split_idx]
        val_indices = indices[split_idx:]
        
        # Create datasets
        train_dataset = OptionsDataset(
            features_scaled[train_indices], targets[train_indices], 
            option_types[train_indices], augment=True
        )
        val_dataset = OptionsDataset(
            features_scaled[val_indices], targets[val_indices], 
            option_types[val_indices], augment=False
        )
        
        # DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        
        # Initialize transformer model
        self.model = TransformerDeltaPredictor(
            input_features=features.shape[1],
            d_model=128,
            nhead=8,
            num_layers=4,
            dropout=0.1
        ).to(self.device)
        
        # Multi-objective loss function
        delta_criterion = nn.MSELoss()
        change_criterion = nn.MSELoss()
        
        # Optimizer with transformer-friendly settings
        optimizer = optim.AdamW(
            self.model.parameters(), 
            lr=0.001, 
            weight_decay=1e-4,
            betas=(0.9, 0.999),
            eps=1e-8
        )
        
        # Cosine annealing scheduler
        scheduler = CosineAnnealingLR(optimizer, T_max=epochs, eta_min=1e-6)
        
        # Training loop
        best_val_loss = float('inf')
        patience_counter = 0
        patience = 20
        
        for epoch in range(epochs):
            # Training phase
            self.model.train()
            train_loss = 0.0
            
            for batch_features, batch_targets, batch_types in train_loader:
                batch_features = batch_features.to(self.device)
                batch_targets = batch_targets.to(self.device)
                batch_types = batch_types.to(self.device)
                
                optimizer.zero_grad()
                
                # Forward pass
                delta_pred, change_pred, attention_weights = self.model(batch_features, batch_types)
                
                # Multi-objective loss
                delta_loss = delta_criterion(delta_pred, batch_targets)
                
                # Add attention regularization
                attention_reg = torch.mean(torch.sum(attention_weights ** 2, dim=-1))
                
                total_loss = delta_loss + 0.01 * attention_reg
                
                total_loss.backward()
                
                # Gradient clipping for transformer stability
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                
                optimizer.step()
                train_loss += total_loss.item()
            
            # Validation phase
            self.model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for batch_features, batch_targets, batch_types in val_loader:
                    batch_features = batch_features.to(self.device)
                    batch_targets = batch_targets.to(self.device)
                    batch_types = batch_types.to(self.device)
                    
                    delta_pred, change_pred, _ = self.model(batch_features, batch_types)
                    loss = delta_criterion(delta_pred, batch_targets)
                    val_loss += loss.item()
            
            train_loss /= len(train_loader)
            val_loss /= len(val_loader)
            
            self.training_history['loss'].append(train_loss)
            self.training_history['val_loss'].append(val_loss)
            
            scheduler.step()
            
            if epoch % 20 == 0:
                lr = scheduler.get_last_lr()[0]
                print(f"Epoch {epoch}/{epochs} - Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}, LR: {lr:.6f}")
            
            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                torch.save(self.model.state_dict(), 'best_transformer_delta_model.pth')
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch}")
                    break
        
        # Load best model
        self.model.load_state_dict(torch.load('best_transformer_delta_model.pth'))
        self.model_trained = True
        print(f"Training completed. Best validation loss: {best_val_loss:.6f}")
    
    def predict_with_transformer(self, features, option_types):
        """Make predictions using trained transformer"""
        if not self.model_trained:
            raise ValueError("Model not trained")
        
        features_scaled = self.scaler.transform(features)
        features_tensor = torch.from_numpy(features_scaled.astype(np.float32)).to(self.device)
        option_types_tensor = torch.from_numpy(option_types.astype(np.float32)).to(self.device)
        
        self.model.eval()
        with torch.no_grad():
            delta_pred, change_pred, attention_weights = self.model(features_tensor, option_types_tensor)
        
        return (delta_pred.cpu().numpy(), 
                change_pred.cpu().numpy(), 
                attention_weights.cpu().numpy())
    
    def generate_csv_output(self):
        """Generate CSV with strike prices, deltas, and potential changes"""
        if not hasattr(self, 'options_df'):
            raise ValueError("No options data available. Run training first.")
        
        df = self.options_df.copy()
        
        # Prepare features for prediction
        feature_columns = [
            'log_moneyness', 'sqrt_time', 'log_time', 'implied_volatility',
            'log_volume', 'log_open_interest', 'moneyness_time', 'iv_time'
        ]
        
        features = df[feature_columns].values
        option_types = (df['type'] == 'call').astype(int).values
        
        # Make predictions
        delta_predictions, change_predictions, attention = self.predict_with_transformer(features, option_types)
        
        # Create output dataframe
        output_df = pd.DataFrame({
            'Strike_Price': df['strike'],
            'Option_Type': df['type'].str.upper(),
            'Expiration_Date': df['expiration'],
            'Expiration_Label': df['expiration_label'],
            'Days_to_Expiry': df['days_to_expiry'],
            'Predicted_Delta': delta_predictions.flatten(),
            'Actual_Delta': df['delta'],
            'Delta_Change_Potential': change_predictions.flatten(),
            'Implied_Volatility': df['implied_volatility'],
            'Volume': df['volume'],
            'Open_Interest': df['open_interest'],
            'Moneyness': df['moneyness']
        })
        
        # Round numerical columns
        numerical_cols = ['Predicted_Delta', 'Actual_Delta', 'Delta_Change_Potential', 
                         'Implied_Volatility', 'Moneyness']
        for col in numerical_cols:
            output_df[col] = output_df[col].round(4)
        
        # Sort by strike price and option type
        output_df = output_df.sort_values(['Strike_Price', 'Option_Type', 'Days_to_Expiry'])
        
        # Save to CSV
        csv_filename = f'options_delta_predictions_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
        output_df.to_csv(csv_filename, index=False)
        
        print(f"\nCSV saved as: {csv_filename}")
        print(f"Total options analyzed: {len(output_df)}")
        print(f"Strike price range: ${output_df['Strike_Price'].min()} - ${output_df['Strike_Price'].max()}")
        
        # Display sample of the data
        print("\nSample of generated data:")
        print(output_df.head(20).to_string(index=False))
        
        return output_df, csv_filename
    
    def run_complete_analysis(self):
        """Run complete transformer-based analysis"""
        print("Starting Transformer-Based Options Delta Analysis")
        print(f"Target stock price: ${self.current_stock_price}")
        print("Expiration periods: 1 week, 1 month, 45 days")
        print("Strike range: $210-$250")
        
        # Generate options data
        options_df = self.generate_options_data_230()
        print(f"Generated {len(options_df)} option contracts")
        
        # Prepare training data
        features, targets, delta_changes, option_types = self.prepare_training_data(options_df)
        print(f"Prepared training data: {len(features)} samples")
        
        # Train transformer model
        self.train_transformer_model(features, targets, delta_changes, option_types)
        
        # Generate CSV output
        output_df, csv_filename = self.generate_csv_output()
        
        # Calculate performance metrics
        delta_predictions, _, _ = self.predict_with_transformer(features, option_types)
        mse = np.mean((delta_predictions - targets) ** 2)
        mae = np.mean(np.abs(delta_predictions - targets))
        
        print(f"\nTransformer Model Performance:")
        print(f"MSE: {mse:.6f}")
        print(f"MAE: {mae:.6f}")
        
        # Summary by expiration
        print("\nSummary by Expiration Period:")
        summary = output_df.groupby(['Expiration_Label', 'Option_Type']).agg({
            'Predicted_Delta': ['mean', 'std', 'min', 'max'],
            'Delta_Change_Potential': ['mean', 'std'],
            'Strike_Price': 'count'
        }).round(4)
        print(summary)
        
        return output_df, csv_filename

# Initialize scipy for Black-Scholes calculations
try:
    from scipy.stats import norm
except ImportError:
    print("Installing scipy...")
    import subprocess
    subprocess.check_call(['pip', 'install', 'scipy'])
    from scipy.stats import norm

# Example usage
if __name__ == "__main__":
    predictor = TransformerOptionsPredictor()
    
    # Run complete analysis
    output_df, csv_filename = predictor.run_complete_analysis()
    
    print(f"\n{'='*60}")
    print("ANALYSIS COMPLETE")
    print(f"{'='*60}")
    print(f"✅ CSV file generated: {csv_filename}")
    print(f"✅ Model saved: best_transformer_delta_model.pth")
    print("✅ Contains strike prices around $230")
    print("✅ Includes 1 week, 1 month, and 45-day expirations") 
    print("✅ Shows predicted deltas and potential delta changes")
    print("\nCheck the CSV file for detailed results!")

Device: cpu
Stock price set to: $230.0
Starting Transformer-Based Options Delta Analysis
Target stock price: $230.0
Expiration periods: 1 week, 1 month, 45 days
Strike range: $210-$250
Generating options data around $230.0 strike prices
Generated 102 option contracts
Preparing training data for transformer...
Prepared training data: 102 samples
Training Transformer Delta Predictor...
Epoch 0/120 - Train Loss: 0.238256, Val Loss: 0.294218, LR: 0.001000
Epoch 20/120 - Train Loss: 0.139066, Val Loss: 0.072687, LR: 0.000926
Early stopping at epoch 31
Training completed. Best validation loss: 0.072173

CSV saved as: options_delta_predictions_20250828_153245.csv
Total options analyzed: 102
Strike price range: $210.0 - $250.0

Sample of generated data:
 Strike_Price Option_Type Expiration_Date Expiration_Label  Days_to_Expiry  Predicted_Delta  Actual_Delta  Delta_Change_Potential  Implied_Volatility  Volume  Open_Interest  Moneyness
        210.0        CALL      2025-09-04           1 week  