# 🚀 IPO Farming Backtest System - Enhanced Edition

**Comprehensive analysis of ALL IPOs since 2015 with advanced features**

## ✨ New Features
- **Full historical analysis** - All IPOs since Jan 1, 2015 (including delisted)
- **Checkpoint/Resume** - Handles IBKR rate limits gracefully
- **Organized outputs** - Each backtest in timestamped folder with metadata
- **Automatic abstracts** - High-level summaries of findings
- **Master tracking** - Compare results across multiple backtests

## 📊 What This Does
1. Loads all IPOs since 2015 from Jay Ritter database
2. Fetches intraday data (IBKR or simulated)
3. Tests 91+ trading windows
4. Generates comprehensive reports with metadata
5. Tracks results across multiple runs

## 1️⃣ Setup & Installation

In [None]:
# Install and import required packages
import subprocess
import sys
import importlib
import json
import time
from pathlib import Path

packages = {
    'pandas': 'pandas',
    'numpy': 'numpy',
    'matplotlib': 'matplotlib',
    'seaborn': 'seaborn',
    'openpyxl': 'openpyxl',
    'pytz': 'pytz',
    'tqdm': 'tqdm',
    'ib_insync': 'ib-insync',
    'yfinance': 'yfinance'
}

for import_name, install_name in packages.items():
    try:
        importlib.import_module(import_name)
        print(f"✓ {import_name} already installed")
    except ImportError:
        print(f"Installing {install_name}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", install_name])
        print(f"✓ {install_name} installed")

# Now import everything
import os
import random
import warnings
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
import pytz
from tqdm import tqdm
import yfinance as yf

warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
pd.options.display.float_format = '{:.2f}'.format

print("\n✅ Environment ready!")

## 2️⃣ Configuration & Test Setup

In [None]:
# Configuration
CONFIG = {
    # Data settings
    'DATA_MODE': 'HYBRID',  # 'SIMULATION', 'IBKR', or 'HYBRID' (IBKR with fallback)
    'START_DATE': '2015-01-01',  # Analyze IPOs since this date
    'END_DATE': datetime.now().strftime('%Y-%m-%d'),
    'INCLUDE_DELISTED': True,  # Include delisted companies
    
    # Processing settings
    'MAX_TICKERS': None,  # None for all, or specify number
    'BATCH_SIZE': 10,  # Process in batches to manage memory
    'CHECKPOINT_ENABLED': True,  # Save progress for resume capability
    
    # IBKR Settings
    'IBKR': {
        'HOST': '127.0.0.1',
        'PORT': 7497,  # 7497 for Paper, 7496 for Live
        'CLIENT_ID': random.randint(1, 999),
        'REQUEST_DELAY': 5.0,  # Base delay between requests
        'PACING_DELAY': 15.0,  # Additional delay after errors
        'MAX_RETRIES': 3,
        'BACKOFF_FACTOR': 2.0  # Exponential backoff multiplier
    },
    
    # Output settings
    'OUTPUT_BASE': 'outputs',
    'TEST_PREFIX': 'backtest',
    'GENERATE_ABSTRACT': True,
    'GENERATE_VISUALIZATIONS': True,
    'TRACK_MASTER_LOG': True
}

# Create test directory with timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
test_dir = Path(CONFIG['OUTPUT_BASE']) / f"{CONFIG['TEST_PREFIX']}_{timestamp}"
test_dir.mkdir(parents=True, exist_ok=True)

# Save configuration
with open(test_dir / 'config.json', 'w') as f:
    json.dump(CONFIG, f, indent=2, default=str)

print(f"📊 Test Configuration:")
print(f"   • Date Range: {CONFIG['START_DATE']} to {CONFIG['END_DATE']}")
print(f"   • Data Mode: {CONFIG['DATA_MODE']}")
print(f"   • Include Delisted: {CONFIG['INCLUDE_DELISTED']}")
print(f"   • Test Directory: {test_dir}")
print(f"\n✅ Configuration saved to {test_dir}/config.json")

## 3️⃣ Load IPO Universe (2015-Present)

In [None]:
def load_ipo_universe_full():
    """Load all IPOs since 2015 from Jay Ritter database"""
    
    try:
        # Load from Excel
        df = pd.read_excel('IPO-age.xlsx', sheet_name='1975-2024', dtype={'offer date': str})
        
        # Parse dates
        df['IPO_Date'] = pd.to_datetime(df['offer date'], format='%Y%m%d', errors='coerce')
        
        # Filter by date range
        df = df[(df['IPO_Date'] >= CONFIG['START_DATE']) & 
                (df['IPO_Date'] <= CONFIG['END_DATE'])]
        
        # Clean ticker symbols
        df = df.dropna(subset=['Ticker'])
        df['Ticker'] = df['Ticker'].astype(str).str.strip().str.upper()
        
        # Rename columns
        df = df.rename(columns={
            'IPO name': 'Company',
            'Offer Price': 'IPO_Price'
        })
        
        # Add status (check if currently trading)
        print("\n🔍 Checking ticker status (active/delisted)...")
        df['Status'] = 'Unknown'
        
        # Quick check using yfinance (in batches to avoid rate limits)
        for i in tqdm(range(0, len(df), 50), desc="Checking status"):
            batch = df.iloc[i:i+50]
            for _, row in batch.iterrows():
                try:
                    ticker = yf.Ticker(row['Ticker'])
                    info = ticker.info
                    if info and 'symbol' in info:
                        df.loc[df['Ticker'] == row['Ticker'], 'Status'] = 'Active'
                    else:
                        df.loc[df['Ticker'] == row['Ticker'], 'Status'] = 'Delisted'
                except:
                    df.loc[df['Ticker'] == row['Ticker'], 'Status'] = 'Delisted'
            time.sleep(1)  # Small delay between batches
        
        # Select columns
        universe = df[['Ticker', 'Company', 'IPO_Date', 'IPO_Price', 'Status']].copy()
        
        # Handle missing IPO prices
        universe['IPO_Price'] = pd.to_numeric(universe['IPO_Price'], errors='coerce')
        universe['IPO_Price'].fillna(universe['IPO_Price'].median(), inplace=True)
        
        # Apply max ticker limit if specified
        if CONFIG['MAX_TICKERS']:
            universe = universe.head(CONFIG['MAX_TICKERS'])
        
        return universe
        
    except Exception as e:
        print(f"❌ Error loading IPO data: {e}")
        print("Using sample data instead...")
        
        # Fallback sample data
        sample_data = [
            ('UBER', 'Uber', '2019-05-10', 45.00, 'Active'),
            ('LYFT', 'Lyft', '2019-03-29', 72.00, 'Active'),
            ('ABNB', 'Airbnb', '2020-12-10', 68.00, 'Active'),
            ('DASH', 'DoorDash', '2020-12-09', 102.00, 'Active'),
            ('SNOW', 'Snowflake', '2020-09-16', 120.00, 'Active'),
            ('COIN', 'Coinbase', '2021-04-14', 250.00, 'Active'),
            ('HOOD', 'Robinhood', '2021-07-29', 38.00, 'Active'),
            ('RIVN', 'Rivian', '2021-11-10', 78.00, 'Active'),
        ]
        
        universe = pd.DataFrame(sample_data, 
                               columns=['Ticker', 'Company', 'IPO_Date', 'IPO_Price', 'Status'])
        universe['IPO_Date'] = pd.to_datetime(universe['IPO_Date'])
        return universe

# Load IPO universe
print("📚 Loading IPO universe...")
ipo_universe = load_ipo_universe_full()

# Save universe to test directory
ipo_universe.to_csv(test_dir / 'ipo_universe.csv', index=False)

# Summary statistics
print(f"\n📊 IPO Universe Summary:")
print(f"   • Total IPOs: {len(ipo_universe)}")
print(f"   • Active: {(ipo_universe['Status'] == 'Active').sum()}")
print(f"   • Delisted: {(ipo_universe['Status'] == 'Delisted').sum()}")
print(f"   • Unknown: {(ipo_universe['Status'] == 'Unknown').sum()}")
print(f"   • Date Range: {ipo_universe['IPO_Date'].min().date()} to {ipo_universe['IPO_Date'].max().date()}")
print(f"\n📊 By Year:")
ipo_universe['Year'] = ipo_universe['IPO_Date'].dt.year
year_counts = ipo_universe.groupby('Year').size()
for year, count in year_counts.items():
    print(f"   {year}: {count} IPOs")

print(f"\n💾 Universe saved to {test_dir}/ipo_universe.csv")
ipo_universe.head(10)

## 4️⃣ Data Collection with Checkpoint System

In [None]:
class IPODataCollector:
    """Handles data collection with checkpointing and rate limit management"""
    
    def __init__(self, test_dir, config):
        self.test_dir = Path(test_dir)
        self.config = config
        self.checkpoint_file = self.test_dir / 'checkpoint.json'
        self.data_dir = self.test_dir / 'ticker_data'
        self.data_dir.mkdir(exist_ok=True)
        
        # Load checkpoint if exists
        self.checkpoint = self.load_checkpoint()
        
    def load_checkpoint(self):
        """Load existing checkpoint or create new"""
        if self.checkpoint_file.exists():
            with open(self.checkpoint_file, 'r') as f:
                return json.load(f)
        return {
            'processed': [],
            'failed': [],
            'last_processed': None,
            'total_requests': 0,
            'start_time': datetime.now().isoformat()
        }
    
    def save_checkpoint(self):
        """Save current checkpoint"""
        with open(self.checkpoint_file, 'w') as f:
            json.dump(self.checkpoint, f, indent=2)
    
    def generate_ipo_day_data(self, ticker, ipo_date, ipo_price):
        """Generate realistic simulated IPO day data"""
        np.random.seed(hash(ticker) % 2**32)
        
        # IPO day patterns
        pop = np.random.uniform(0.9, 1.5)  # Opening pop
        volatility = np.random.uniform(0.003, 0.008)
        trend = np.random.uniform(-0.0002, 0.0005)
        
        # Generate time series
        eastern = pytz.timezone('America/New_York')
        start_time = pd.Timestamp(ipo_date).replace(hour=9, minute=30)
        end_time = pd.Timestamp(ipo_date).replace(hour=16, minute=0)
        times = pd.date_range(start_time, end_time, freq='1min')
        
        # Generate prices
        open_price = ipo_price * pop
        prices = [open_price]
        
        for i in range(1, len(times)):
            hour = times[i].hour
            
            # Intraday patterns
            if hour < 10:
                vol_mult = 1.5
                trend_mult = -0.5
            elif hour < 12:
                vol_mult = 1.2
                trend_mult = 0.5
            elif hour < 14:
                vol_mult = 0.8
                trend_mult = 1.0
            else:
                vol_mult = 1.1
                trend_mult = 1.5
            
            change = np.random.normal(trend * trend_mult, volatility * vol_mult)
            new_price = prices[-1] * (1 + change)
            prices.append(max(new_price, ipo_price * 0.5))
        
        # Create DataFrame
        df = pd.DataFrame({
            'datetime': times,
            'open': prices,
            'high': np.array(prices) * (1 + np.abs(np.random.normal(0, 0.002, len(prices)))),
            'low': np.array(prices) * (1 - np.abs(np.random.normal(0, 0.002, len(prices)))),
            'close': prices,
            'volume': np.random.gamma(2, 100000, len(prices)).astype(int)
        })
        
        # Ensure OHLC consistency
        df['high'] = df[['open', 'high', 'close']].max(axis=1)
        df['low'] = df[['open', 'low', 'close']].min(axis=1)
        
        return df
    
    def fetch_ibkr_data(self, ticker, ipo_date):
        """Fetch data from IBKR with retry logic"""
        try:
            from ib_insync import IB, Stock, util
            
            ib = IB()
            ib.connect(
                self.config['IBKR']['HOST'],
                self.config['IBKR']['PORT'],
                clientId=self.config['IBKR']['CLIENT_ID'] + random.randint(0, 100)
            )
            
            contract = Stock(ticker, 'SMART', 'USD')
            qualified = ib.qualifyContracts(contract)
            
            if qualified:
                eastern = pytz.timezone('America/New_York')
                end_dt = eastern.localize(ipo_date.replace(hour=23, minute=59))
                end_str = end_dt.strftime("%Y%m%d %H:%M:%S") + " US/Eastern"
                
                bars = ib.reqHistoricalData(
                    qualified[0],
                    endDateTime=end_str,
                    durationStr='1 D',
                    barSizeSetting='1 min',
                    whatToShow='TRADES',
                    useRTH=False,
                    formatDate=1
                )
                
                if bars:
                    df = util.df(bars)
                    df['datetime'] = pd.to_datetime(df['date'])
                    df = df.drop('date', axis=1)
                    ib.disconnect()
                    return df
            
            ib.disconnect()
            return None
            
        except Exception as e:
            print(f"   IBKR error for {ticker}: {str(e)[:50]}")
            return None
    
    def collect_data(self, universe):
        """Main data collection with checkpoint support"""
        print(f"\n📡 Starting data collection...")
        print(f"   Mode: {self.config['DATA_MODE']}")
        print(f"   Already processed: {len(self.checkpoint['processed'])}")
        
        # Filter out already processed
        remaining = universe[~universe['Ticker'].isin(self.checkpoint['processed'])]
        print(f"   Remaining: {len(remaining)}")
        
        # Process in batches
        batch_size = self.config['BATCH_SIZE']
        collected_data = {}
        
        for batch_idx in range(0, len(remaining), batch_size):
            batch = remaining.iloc[batch_idx:batch_idx + batch_size]
            print(f"\n📦 Processing batch {batch_idx//batch_size + 1}/{(len(remaining)-1)//batch_size + 1}")
            
            for _, row in tqdm(batch.iterrows(), total=len(batch), desc="Collecting"):
                ticker = row['Ticker']
                
                # Check if already saved
                ticker_file = self.data_dir / f"{ticker}.csv"
                if ticker_file.exists():
                    df = pd.read_csv(ticker_file)
                    df['datetime'] = pd.to_datetime(df['datetime'])
                    collected_data[ticker] = df
                    continue
                
                # Collect data based on mode
                df = None
                
                if self.config['DATA_MODE'] in ['IBKR', 'HYBRID']:
                    # Try IBKR first
                    for attempt in range(self.config['IBKR']['MAX_RETRIES']):
                        df = self.fetch_ibkr_data(ticker, row['IPO_Date'])
                        if df is not None:
                            break
                        
                        # Exponential backoff
                        delay = self.config['IBKR']['REQUEST_DELAY'] * \
                                (self.config['IBKR']['BACKOFF_FACTOR'] ** attempt)
                        time.sleep(delay)
                    
                    if df is not None:
                        # Success with IBKR
                        time.sleep(self.config['IBKR']['REQUEST_DELAY'])
                    elif self.config['DATA_MODE'] == 'HYBRID':
                        # Fallback to simulation
                        df = self.generate_ipo_day_data(
                            ticker, row['IPO_Date'], row['IPO_Price']
                        )
                else:
                    # Pure simulation mode
                    df = self.generate_ipo_day_data(
                        ticker, row['IPO_Date'], row['IPO_Price']
                    )
                
                if df is not None:
                    # Save data
                    df.to_csv(ticker_file, index=False)
                    collected_data[ticker] = df
                    self.checkpoint['processed'].append(ticker)
                else:
                    self.checkpoint['failed'].append(ticker)
                
                # Update checkpoint
                self.checkpoint['last_processed'] = ticker
                self.checkpoint['total_requests'] += 1
                
                # Save checkpoint every 10 tickers
                if len(self.checkpoint['processed']) % 10 == 0:
                    self.save_checkpoint()
            
            # Pacing delay between batches
            if batch_idx + batch_size < len(remaining):
                print(f"   Waiting {self.config['IBKR']['PACING_DELAY']}s before next batch...")
                time.sleep(self.config['IBKR']['PACING_DELAY'])
        
        # Final checkpoint save
        self.save_checkpoint()
        
        print(f"\n✅ Data collection complete!")
        print(f"   Successful: {len(collected_data)}")
        print(f"   Failed: {len(self.checkpoint['failed'])}")
        
        return collected_data

# Initialize collector and run
collector = IPODataCollector(test_dir, CONFIG)
ipo_data = collector.collect_data(ipo_universe)

print(f"\n📊 Collected data for {len(ipo_data)} tickers")
print(f"💾 Data saved to {test_dir}/ticker_data/")

## 5️⃣ Window Analysis Engine

In [None]:
def analyze_trading_windows_enhanced(data_dict, metadata):
    """Enhanced window analysis with metadata tracking"""
    
    print(f"\n🔍 Analyzing {len(data_dict)} tickers across trading windows...")
    
    # Generate all possible windows
    times = []
    for hour in range(9, 16):
        for minute in [0, 30]:
            if hour == 9 and minute == 0:
                continue
            if hour == 16 and minute == 30:
                continue
            times.append(f"{hour:02d}:{minute:02d}")
    
    results = []
    window_details = {}  # Store per-ticker results
    
    total_windows = sum(1 for i in range(len(times)-1) for _ in times[i+1:])
    
    with tqdm(total=total_windows, desc="Analyzing windows") as pbar:
        for i, buy_time_str in enumerate(times[:-1]):
            for sell_time_str in times[i+1:]:
                buy_time = pd.to_datetime(buy_time_str).time()
                sell_time = pd.to_datetime(sell_time_str).time()
                window_key = f"{buy_time_str}-{sell_time_str}"
                
                returns = []
                ticker_returns = {}
                
                for ticker, df in data_dict.items():
                    # Find prices at specified times
                    buy_mask = df['datetime'].dt.time == buy_time
                    sell_mask = df['datetime'].dt.time == sell_time
                    
                    if buy_mask.any() and sell_mask.any():
                        buy_price = df.loc[buy_mask, 'close'].iloc[0]
                        sell_price = df.loc[sell_mask, 'close'].iloc[0]
                        
                        if buy_price > 0:
                            ret = (sell_price - buy_price) / buy_price
                            returns.append(ret)
                            ticker_returns[ticker] = ret
                
                if len(returns) >= 5:  # Minimum sample size
                    duration = (datetime.strptime(sell_time_str, '%H:%M') - 
                              datetime.strptime(buy_time_str, '%H:%M')).seconds / 3600
                    
                    # Calculate statistics
                    avg_return = np.mean(returns) * 100
                    median_return = np.median(returns) * 100
                    std_return = np.std(returns) * 100
                    win_rate = sum(1 for r in returns if r > 0) / len(returns) * 100
                    sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252) if np.std(returns) > 0 else 0
                    
                    # Calculate additional metrics
                    skew = pd.Series(returns).skew()
                    kurtosis = pd.Series(returns).kurtosis()
                    max_drawdown = min(returns) * 100
                    best_ticker = max(ticker_returns.items(), key=lambda x: x[1])[0]
                    worst_ticker = min(ticker_returns.items(), key=lambda x: x[1])[0]
                    
                    results.append({
                        'window': window_key,
                        'buy_time': buy_time_str,
                        'sell_time': sell_time_str,
                        'duration_hrs': duration,
                        'n_tickers': len(returns),
                        'avg_return': avg_return,
                        'median_return': median_return,
                        'std_return': std_return,
                        'win_rate': win_rate,
                        'sharpe': sharpe,
                        'max_return': max(returns) * 100,
                        'min_return': min(returns) * 100,
                        'skew': skew,
                        'kurtosis': kurtosis,
                        'return_per_hour': avg_return / duration if duration > 0 else 0,
                        'best_ticker': best_ticker,
                        'worst_ticker': worst_ticker
                    })
                    
                    # Store detailed results
                    window_details[window_key] = ticker_returns
                
                pbar.update(1)
    
    # Create DataFrame and sort
    results_df = pd.DataFrame(results).sort_values('avg_return', ascending=False).reset_index(drop=True)
    
    # Add metadata
    metadata['windows_analyzed'] = len(results_df)
    metadata['best_window'] = results_df.iloc[0]['window'] if len(results_df) > 0 else None
    metadata['best_return'] = results_df.iloc[0]['avg_return'] if len(results_df) > 0 else None
    metadata['best_sharpe'] = results_df['sharpe'].max() if len(results_df) > 0 else None
    metadata['highest_win_rate'] = results_df['win_rate'].max() if len(results_df) > 0 else None
    
    return results_df, window_details, metadata

# Run enhanced analysis
metadata = {
    'test_timestamp': timestamp,
    'tickers_analyzed': len(ipo_data),
    'date_range': f"{CONFIG['START_DATE']} to {CONFIG['END_DATE']}",
    'data_mode': CONFIG['DATA_MODE']
}

results_df, window_details, metadata = analyze_trading_windows_enhanced(ipo_data, metadata)

# Save results
results_df.to_csv(test_dir / 'window_analysis_results.csv', index=False)
with open(test_dir / 'metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"\n✅ Analysis complete!")
print(f"   Windows analyzed: {len(results_df)}")
print(f"   Best window: {metadata['best_window']}")
print(f"   Best return: {metadata['best_return']:.2f}%")
print(f"\n💾 Results saved to {test_dir}")

# Display top results
print("\n🏆 TOP 10 WINDOWS:")
results_df.head(10)[['window', 'avg_return', 'win_rate', 'sharpe', 'n_tickers']]

## 6️⃣ Generate Abstract & Summary

In [None]:
def generate_abstract(results_df, metadata, test_dir):
    """Generate high-level abstract of findings"""
    
    # Classify results
    profitable_windows = len(results_df[results_df['avg_return'] > 0])
    high_win_rate = len(results_df[results_df['win_rate'] > 60])
    
    # Determine overall assessment
    if profitable_windows > len(results_df) * 0.7:
        assessment = "STRONG POSITIVE"
        emoji = "🟢"
    elif profitable_windows > len(results_df) * 0.4:
        assessment = "MIXED"
        emoji = "🟡"
    else:
        assessment = "WEAK"
        emoji = "🔴"
    
    # Key insights
    top3 = results_df.head(3)
    morning_windows = results_df[results_df['buy_time'].apply(lambda x: int(x.split(':')[0]) < 12)]
    afternoon_windows = results_df[results_df['buy_time'].apply(lambda x: int(x.split(':')[0]) >= 12)]
    
    abstract = f"""# IPO Farming Backtest Abstract

## Test Information
- **Test ID**: {metadata['test_timestamp']}
- **Date Range**: {metadata['date_range']}
- **Tickers Analyzed**: {metadata['tickers_analyzed']}
- **Windows Tested**: {metadata['windows_analyzed']}
- **Data Source**: {metadata['data_mode']}

## Overall Assessment: {emoji} {assessment}

## Key Findings

### Performance Summary
- **Profitable Windows**: {profitable_windows}/{len(results_df)} ({profitable_windows/len(results_df)*100:.1f}%)
- **High Win Rate (>60%)**: {high_win_rate} windows
- **Best Return**: {results_df.iloc[0]['avg_return']:.2f}% ({results_df.iloc[0]['window']})
- **Best Sharpe**: {results_df['sharpe'].max():.2f}
- **Highest Win Rate**: {results_df['win_rate'].max():.1f}%

### Top 3 Strategies
1. **{top3.iloc[0]['window']}**: {top3.iloc[0]['avg_return']:.2f}% return, {top3.iloc[0]['win_rate']:.1f}% win rate
2. **{top3.iloc[1]['window']}**: {top3.iloc[1]['avg_return']:.2f}% return, {top3.iloc[1]['win_rate']:.1f}% win rate
3. **{top3.iloc[2]['window']}**: {top3.iloc[2]['avg_return']:.2f}% return, {top3.iloc[2]['win_rate']:.1f}% win rate

### Time-Based Insights
- **Morning Entry (before 12:00)**: Avg return = {morning_windows['avg_return'].mean():.2f}%
- **Afternoon Entry (after 12:00)**: Avg return = {afternoon_windows['avg_return'].mean():.2f}%
- **Optimal Entry Hour**: {results_df.groupby(results_df['buy_time'].apply(lambda x: x.split(':')[0]))['avg_return'].mean().idxmax()}:00-:30
- **Optimal Exit Hour**: {results_df.groupby(results_df['sell_time'].apply(lambda x: x.split(':')[0]))['avg_return'].mean().idxmax()}:00-:30

### Risk Analysis
- **Average Volatility**: {results_df['std_return'].mean():.2f}%
- **Worst Drawdown**: {results_df['min_return'].min():.2f}%
- **Average Skew**: {results_df['skew'].mean():.2f}
- **Risk-Adjusted Best**: {results_df.nlargest(1, 'sharpe').iloc[0]['window']} (Sharpe: {results_df['sharpe'].max():.2f})

## Interesting Observations
"""
    
    # Add interesting observations
    observations = []
    
    # Check for patterns
    if morning_windows['avg_return'].mean() > afternoon_windows['avg_return'].mean() * 1.5:
        observations.append("- Morning entries significantly outperform afternoon entries")
    
    short_holds = results_df[results_df['duration_hrs'] <= 2]
    long_holds = results_df[results_df['duration_hrs'] > 4]
    if short_holds['avg_return'].mean() > long_holds['avg_return'].mean():
        observations.append("- Shorter holding periods show better returns than longer holds")
    
    if results_df.iloc[0]['win_rate'] > 70:
        observations.append(f"- Top strategy shows exceptional consistency with {results_df.iloc[0]['win_rate']:.1f}% win rate")
    
    if len(observations) > 0:
        abstract += "\n".join(observations)
    else:
        abstract += "- No exceptional patterns detected in this test"
    
    abstract += f"""

## Recommendations

Based on this analysis:
1. **Entry Time**: Consider entering positions around {results_df.iloc[0]['buy_time']}
2. **Exit Time**: Target exits around {results_df.iloc[0]['sell_time']}
3. **Hold Duration**: Optimal holding period appears to be {results_df.iloc[0]['duration_hrs']:.1f} hours
4. **Risk Management**: Use stop losses due to {abs(results_df['min_return'].min()):.1f}% worst-case drawdown
5. **Position Sizing**: Consider win rate of {results_df.iloc[0]['win_rate']:.1f}% when sizing positions

## Test Metadata
- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- **Test Directory**: {test_dir.name}
- **Status**: Complete
"""
    
    # Save abstract
    with open(test_dir / 'abstract.md', 'w') as f:
        f.write(abstract)
    
    return abstract

# Generate and display abstract
abstract = generate_abstract(results_df, metadata, test_dir)
print(abstract)
print(f"\n💾 Abstract saved to {test_dir}/abstract.md")

## 7️⃣ Master Results Tracking

In [None]:
def update_master_log(test_dir, metadata, results_df):
    """Update master log with test results"""
    
    master_log_file = Path(CONFIG['OUTPUT_BASE']) / 'master_results_log.csv'
    
    # Create entry for this test
    entry = {
        'test_id': metadata['test_timestamp'],
        'test_dir': test_dir.name,
        'date': datetime.now().strftime('%Y-%m-%d'),
        'time': datetime.now().strftime('%H:%M:%S'),
        'tickers_analyzed': metadata['tickers_analyzed'],
        'date_range': metadata['date_range'],
        'data_mode': metadata['data_mode'],
        'windows_analyzed': metadata['windows_analyzed'],
        'best_window': metadata.get('best_window'),
        'best_return': metadata.get('best_return'),
        'best_sharpe': metadata.get('best_sharpe'),
        'highest_win_rate': metadata.get('highest_win_rate'),
        'profitable_windows': len(results_df[results_df['avg_return'] > 0]),
        'avg_return_all': results_df['avg_return'].mean(),
        'status': 'Complete'
    }
    
    # Load existing log or create new
    if master_log_file.exists():
        master_log = pd.read_csv(master_log_file)
        master_log = pd.concat([master_log, pd.DataFrame([entry])], ignore_index=True)
    else:
        master_log = pd.DataFrame([entry])
    
    # Save updated log
    master_log.to_csv(master_log_file, index=False)
    
    print(f"\n📋 Master Log Updated")
    print(f"   Total tests: {len(master_log)}")
    print(f"   Location: {master_log_file}")
    
    # Display comparison
    print("\n📊 Comparison with Previous Tests:")
    print(master_log[['test_id', 'tickers_analyzed', 'best_window', 'best_return', 'best_sharpe']].tail(5))
    
    return master_log

# Update master log
if CONFIG['TRACK_MASTER_LOG']:
    master_log = update_master_log(test_dir, metadata, results_df)

## 8️⃣ Final Summary & Next Steps

In [None]:
print("="*80)
print(" "*25 + "✅ ANALYSIS COMPLETE!")
print("="*80)

print(f"\n📊 TEST SUMMARY:")
print(f"   • Test ID: {metadata['test_timestamp']}")
print(f"   • IPOs Analyzed: {metadata['tickers_analyzed']}")
print(f"   • Windows Tested: {metadata['windows_analyzed']}")
print(f"   • Best Strategy: {metadata['best_window']}")
print(f"   • Expected Return: {metadata['best_return']:.2f}%")

print(f"\n📁 OUTPUT LOCATION:")
print(f"   {test_dir}")

print(f"\n📄 FILES GENERATED:")
files = [
    'config.json',
    'ipo_universe.csv', 
    'window_analysis_results.csv',
    'metadata.json',
    'abstract.md',
    'checkpoint.json'
]

for file in files:
    file_path = test_dir / file
    if file_path.exists():
        size = file_path.stat().st_size / 1024
        print(f"   ✓ {file:30s} ({size:6.1f} KB)")

print(f"\n🚀 NEXT STEPS:")
print("   1. Review abstract.md for high-level findings")
print("   2. Analyze window_analysis_results.csv for detailed metrics")
print("   3. Compare with previous tests in master_results_log.csv")
print("   4. Run visualization notebook for charts and graphs")
print("   5. Test top strategies with paper trading")

print(f"\n💡 TO RUN ANOTHER TEST:")
print("   1. Modify CONFIG settings in cell 2")
print("   2. Re-run all cells")
print("   3. Results will be saved in new timestamped directory")

print("\n" + "="*80)
print("Thank you for using IPO Farming Backtest System!")
print("="*80)