# Trade ID Hash Testing

Testing hash generation for trade_id column using:
- scrape_date
- strategy_type
- tab_name
- ticker
- trigger_price
- strike_price

In [1]:
import hashlib
import pandas as pd
import sys
import os
from datetime import datetime

# Add database directory to path
sys.path.append(os.path.join(os.getcwd(), 'database'))

from database_config import get_db_connection

In [2]:
def generate_trade_id(scrape_date, strategy_type, tab_name, ticker, trigger_price, strike_price):
    """
    Generate a unique trade_id hash from the specified columns.
    
    Args:
        scrape_date: Date when data was scraped
        strategy_type: Type of strategy (Bear Call, Bull Put, etc.)
        tab_name: Risk level and expiry category
        ticker: Stock ticker symbol
        trigger_price: Price that triggers the strategy
        strike_price: Strike prices for the option spread
    
    Returns:
        str: SHA-256 hash as hexadecimal string
    """
    # Convert all inputs to strings and handle None values
    components = [
        str(scrape_date) if scrape_date is not None else '',
        str(strategy_type) if strategy_type is not None else '',
        str(tab_name) if tab_name is not None else '',
        str(ticker) if ticker is not None else '',
        str(trigger_price) if trigger_price is not None else '',
        str(strike_price) if strike_price is not None else ''
    ]
    
    # Join components with a delimiter to avoid hash collisions
    combined_string = '|'.join(components)
    
    # Generate SHA-256 hash
    hash_object = hashlib.sha256(combined_string.encode('utf-8'))
    trade_id = hash_object.hexdigest()
    
    return trade_id

In [3]:
# Test with sample data
sample_data = [
    {
        'scrape_date': '2025-01-15 10:30:00',
        'strategy_type': 'Bear Call Spread',
        'tab_name': 'High Risk - Near Exp',
        'ticker': 'AAPL',
        'trigger_price': '150.00',
        'strike_price': '155/160'
    },
    {
        'scrape_date': '2025-01-15 10:30:00',
        'strategy_type': 'Bull Put Spread',
        'tab_name': 'Medium Risk - Standard',
        'ticker': 'TSLA',
        'trigger_price': '200.00',
        'strike_price': '195/190'
    },
    {
        'scrape_date': '2025-01-15 10:30:00',
        'strategy_type': 'Bear Call Spread',
        'tab_name': 'High Risk - Near Exp',
        'ticker': 'AAPL',
        'trigger_price': '150.00',
        'strike_price': '155/160'  # Same as first - should generate same hash
    }
]

# Generate trade IDs for sample data
for i, data in enumerate(sample_data):
    trade_id = generate_trade_id(
        data['scrape_date'],
        data['strategy_type'],
        data['tab_name'],
        data['ticker'],
        data['trigger_price'],
        data['strike_price']
    )
    print(f"Sample {i+1}:")
    print(f"  Input: {data}")
    print(f"  Trade ID: {trade_id}")
    print(f"  Trade ID (short): {trade_id[:16]}...")
    print()

Sample 1:
  Input: {'scrape_date': '2025-01-15 10:30:00', 'strategy_type': 'Bear Call Spread', 'tab_name': 'High Risk - Near Exp', 'ticker': 'AAPL', 'trigger_price': '150.00', 'strike_price': '155/160'}
  Trade ID: ccbf42cc128092ad3eacb74bcdb5c670226e1c0a3f993ac9917ea82d76b72291
  Trade ID (short): ccbf42cc128092ad...

Sample 2:
  Input: {'scrape_date': '2025-01-15 10:30:00', 'strategy_type': 'Bull Put Spread', 'tab_name': 'Medium Risk - Standard', 'ticker': 'TSLA', 'trigger_price': '200.00', 'strike_price': '195/190'}
  Trade ID: 05a52f305bd6c210123de253ad20198dfa700c75ee231abfbafd17812115a2f6
  Trade ID (short): 05a52f305bd6c210...

Sample 3:
  Input: {'scrape_date': '2025-01-15 10:30:00', 'strategy_type': 'Bear Call Spread', 'tab_name': 'High Risk - Near Exp', 'ticker': 'AAPL', 'trigger_price': '150.00', 'strike_price': '155/160'}
  Trade ID: ccbf42cc128092ad3eacb74bcdb5c670226e1c0a3f993ac9917ea82d76b72291
  Trade ID (short): ccbf42cc128092ad...



In [4]:
# Test with actual database data if available
try:
    db_conn = get_db_connection()
    
    if db_conn.test_connection():
        print("Database connection successful!")
        
        # Get a sample of existing data
        query = """
        SELECT scrape_date, strategy_type, tab_name, ticker, trigger_price, strike_price
        FROM option_strategies 
        LIMIT 5
        """
        
        df = db_conn.execute_query_df(query)
        print(f"\nFound {len(df)} sample records from database:")
        print(df)
        
        # Generate trade IDs for existing data
        if not df.empty:
            print("\nGenerating trade IDs for existing data:")
            for idx, row in df.iterrows():
                trade_id = generate_trade_id(
                    row['scrape_date'],
                    row['strategy_type'],
                    row['tab_name'],
                    row['ticker'],
                    row['trigger_price'],
                    row['strike_price']
                )
                print(f"Row {idx+1}: {trade_id[:16]}... ({row['ticker']} - {row['strategy_type']})")
    else:
        print("Could not connect to database")
        
except Exception as e:
    print(f"Error testing with database: {e}")

Database connection successful!

Found 5 sample records from database:
                  scrape_date strategy_type  \
0  2025-04-11T15:49:43.519952     Bear Call   
1  2025-04-11T15:49:47.010204     Bear Call   
2  2025-04-11T15:49:50.463801     Bear Call   
3  2025-04-11T15:49:53.953933     Bear Call   
4  2025-04-11T15:50:02.382333      Bull Put   

                                        tab_name ticker trigger_price  \
0     Mild Risk 95-97% accuracy > shorter expiry      V        334.75   
1  Minimal Risk 97-99% accuracy > shorter expiry   None          None   
2      Mild Risk 95-97% accuracy > longer expiry   None          None   
3   Minimal Risk 97-99% accuracy > longer expiry   None          None   
4     Mild Risk 95-97% accuracy > shorter expiry   None          None   

             strike_price  
0  sell 350.0 - buy 360.0  
1                    None  
2                    None  
3                    None  
4                    None  

Generating trade IDs for existing data

  return pd.read_sql_query(query, conn)


In [5]:
# Test uniqueness with edge cases
print("Testing edge cases and uniqueness:")

# Test with None values
test_cases = [
    {
        'name': 'With None values',
        'data': (None, 'Bear Call', 'High Risk', 'AAPL', '150.00', None)
    },
    {
        'name': 'Empty strings',
        'data': ('', 'Bear Call', 'High Risk', 'AAPL', '150.00', '')
    },
    {
        'name': 'Different order same values',
        'data': ('2025-01-15', 'Bear Call', 'High Risk', 'AAPL', '150.00', '155/160')
    },
    {
        'name': 'Slightly different strike',
        'data': ('2025-01-15', 'Bear Call', 'High Risk', 'AAPL', '150.00', '155/161')
    }
]

hashes = {}
for test_case in test_cases:
    trade_id = generate_trade_id(*test_case['data'])
    hashes[test_case['name']] = trade_id
    print(f"{test_case['name']}: {trade_id[:16]}...")

# Check for collisions
hash_values = list(hashes.values())
unique_hashes = set(hash_values)
print(f"\nGenerated {len(hash_values)} hashes, {len(unique_hashes)} unique")
if len(hash_values) != len(unique_hashes):
    print("WARNING: Hash collision detected!")
else:
    print("âœ“ No hash collisions detected")

Testing edge cases and uniqueness:
With None values: 8037063bfb8b7a0c...
Empty strings: 8037063bfb8b7a0c...
Different order same values: 41bcc7472cf23ad8...
Slightly different strike: a15b8e582a34e9a9...

Generated 4 hashes, 3 unique


In [None]:
# Function to add trade_id column to existing data
def add_trade_id_to_dataframe(df):
    """
    Add trade_id column to a DataFrame with option strategies data
    """
    df['trade_id'] = df.apply(
        lambda row: generate_trade_id(
            row['scrape_date'],
            row['strategy_type'], 
            row['tab_name'],
            row['ticker'],
            row['trigger_price'],
            row['strike_price']
        ),
        axis=1
    )
    return df

# Test the DataFrame function
test_df = pd.DataFrame(sample_data)
test_df_with_ids = add_trade_id_to_dataframe(test_df.copy())
print("Sample DataFrame with trade_ids:")
print(test_df_with_ids[['ticker', 'strategy_type', 'trade_id']].head())