In [5]:
"""
Diagnostic Script for LPPLS Bubble Detection
Run this to check your data and identify potential issues
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

csv_path = r"C:\Users\B420615\OneDrive - Standard Bank\GoldPlatData.csv"

def diagnose_data(csv_path):
    """
    Diagnose potential issues with input data
    """
    print("="*70)
    print("LPPLS BUBBLE DETECTION - DATA DIAGNOSTIC")
    print("="*70)
    
    # Load data
    print("\n1. Loading Data...")
    try:
        data = pd.read_csv(csv_path)
        print(f"   ✓ File loaded successfully")
        print(f"   Rows: {len(data)}")
        print(f"   Columns: {list(data.columns)}")
    except Exception as e:
        print(f"   ✗ Error loading file: {e}")
        return
    
    # Check columns
    print("\n2. Checking Column Names...")
    required_cols = ['Dates', 'Gold', 'Platinum']
    if set(required_cols).issubset(set(data.columns)):
        print(f"   ✓ All required columns present: {required_cols}")
    else:
        print(f"   ✗ Missing columns!")
        print(f"   Required: {required_cols}")
        print(f"   Found: {list(data.columns)}")
        return
    
    # Parse dates
    print("\n3. Parsing Dates...")
    try:
        data['Dates'] = pd.to_datetime(data['Dates'])
        print(f"   ✓ Dates parsed successfully")
        print(f"   Range: {data['Dates'].min()} to {data['Dates'].max()}")
        print(f"   Total days: {(data['Dates'].max() - data['Dates'].min()).days}")
    except Exception as e:
        print(f"   ✗ Error parsing dates: {e}")
        return
    
    # Check for missing values
    print("\n4. Checking for Missing Values...")
    missing = data[required_cols].isnull().sum()
    if missing.sum() == 0:
        print(f"   ✓ No missing values")
    else:
        print(f"   ✗ Missing values found:")
        for col, count in missing.items():
            if count > 0:
                print(f"      {col}: {count} missing")
    
    # Check for zeros or negatives
    print("\n5. Checking Price Values...")
    for asset in ['Gold', 'Platinum']:
        min_val = data[asset].min()
        max_val = data[asset].max()
        zeros = (data[asset] == 0).sum()
        negatives = (data[asset] < 0).sum()
        
        print(f"\n   {asset}:")
        print(f"      Min: ${min_val:.2f}")
        print(f"      Max: ${max_val:.2f}")
        print(f"      Mean: ${data[asset].mean():.2f}")
        print(f"      Std: ${data[asset].std():.2f}")
        
        if zeros > 0:
            print(f"      ⚠️  Warning: {zeros} zero values")
        if negatives > 0:
            print(f"      ⚠️  Warning: {negatives} negative values")
    
    # Check data sufficiency
    print("\n6. Checking Data Sufficiency for LPPLS...")
    window_size = 252  # Default window
    min_windows = 10   # Minimum windows for meaningful analysis
    
    total_days = len(data)
    possible_windows = (total_days - window_size) // 21 + 1
    
    print(f"   Total observations: {total_days}")
    print(f"   Default window size: {window_size} days (~1 year)")
    print(f"   Possible windows: {possible_windows}")
    
    if total_days < window_size:
        print(f"   ✗ ERROR: Not enough data!")
        print(f"      Need at least {window_size} observations")
        print(f"      You have: {total_days}")
        print(f"      Solution: Get more historical data")
    elif possible_windows < min_windows:
        print(f"   ⚠️  Warning: Limited windows for analysis")
        print(f"      Recommended: At least {min_windows} windows")
        print(f"      You have: {possible_windows}")
        print(f"      Solution: Reduce window_size or get more data")
    else:
        print(f"   ✓ Sufficient data for analysis")
    
    # Check for extreme volatility
    print("\n7. Checking Price Volatility...")
    for asset in ['Gold', 'Platinum']:
        returns = data[asset].pct_change().dropna()
        volatility = returns.std() * np.sqrt(252) * 100  # Annualized
        
        max_return = returns.max() * 100
        min_return = returns.min() * 100
        
        print(f"\n   {asset}:")
        print(f"      Annualized volatility: {volatility:.2f}%")
        print(f"      Max daily return: {max_return:.2f}%")
        print(f"      Min daily return: {min_return:.2f}%")
        
        if volatility > 50:
            print(f"      ⚠️  High volatility - LPPLS may struggle")
        if abs(max_return) > 20 or abs(min_return) > 20:
            print(f"      ⚠️  Extreme daily moves detected")
    
    # Visual inspection
    print("\n8. Generating Visual Inspection Plot...")
    try:
        fig, axes = plt.subplots(2, 1, figsize=(12, 8))
        
        # Price plot
        ax1 = axes[0]
        ax1.plot(data['Dates'], data['Gold'], label='Gold', alpha=0.7)
        ax1.plot(data['Dates'], data['Platinum'], label='Platinum', alpha=0.7)
        ax1.set_ylabel('Price (USD)')
        ax1.set_title('Price History')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # Returns plot
        ax2 = axes[1]
        gold_returns = data['Gold'].pct_change() * 100
        platinum_returns = data['Platinum'].pct_change() * 100
        ax2.plot(data['Dates'], gold_returns, label='Gold Returns', alpha=0.5)
        ax2.plot(data['Dates'], platinum_returns, label='Platinum Returns', alpha=0.5)
        ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
        ax2.set_ylabel('Daily Return (%)')
        ax2.set_xlabel('Date')
        ax2.set_title('Daily Returns')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('data_diagnostic.png', dpi=150, bbox_inches='tight')
        print("   ✓ Plot saved as 'data_diagnostic.png'")
        plt.close()
    except Exception as e:
        print(f"   ✗ Error creating plot: {e}")
    
    # Recommendations
    print("\n" + "="*70)
    print("RECOMMENDATIONS")
    print("="*70)
    
    recommendations = []
    
    if total_days < 500:
        recommendations.append("• Get more historical data (aim for 1000+ days)")
    
    if possible_windows < 20:
        recommendations.append("• Reduce window_size to 126 days (6 months)")
        recommendations.append("• Or reduce step_size to 10 days")
    
    if data['Gold'].std() / data['Gold'].mean() > 0.3:
        recommendations.append("• High volatility detected - expect lower success rate")
    
    if len(recommendations) == 0:
        print("\n✓ Your data looks good for LPPLS analysis!")
        print("\nNext steps:")
        print("  1. Run: python run_analysis.py")
        print("  2. If you get 0 successful fits, try:")
        print("     - Reducing window_size to 200 or 150")
        print("     - Increasing step_size to 30 or 50")
    else:
        print("\nSuggested improvements:")
        for rec in recommendations:
            print(rec)
    
    print("\n" + "="*70)
    print("DIAGNOSTIC COMPLETE")
    print("="*70)


#if __name__ == "__main__":
 #   import sys
  #  
   # if len(sys.argv) > 1:
    #    csv_path = sys.argv[1]
    #else:
     #   csv_path = r"C:\Users\B420615\OneDrive - Standard Bank\GoldPlatData.csv"
    
diagnose_data(csv_path)

LPPLS BUBBLE DETECTION - DATA DIAGNOSTIC

1. Loading Data...
   ✓ File loaded successfully
   Rows: 9470
   Columns: ['Dates', 'Gold', 'Platinum']

2. Checking Column Names...
   ✓ All required columns present: ['Dates', 'Gold', 'Platinum']

3. Parsing Dates...
   ✓ Dates parsed successfully
   Range: 1990-01-02 00:00:00 to 2025-10-20 00:00:00
   Total days: 13075

4. Checking for Missing Values...
   ✓ No missing values

5. Checking Price Values...

   Gold:
      Min: $252.55
      Max: $4326.58
      Mean: $969.52
      Std: $696.89

   Platinum:
      Min: $333.94
      Max: $2250.50
      Mean: $888.25
      Std: $429.01

6. Checking Data Sufficiency for LPPLS...
   Total observations: 9470
   Default window size: 252 days (~1 year)
   Possible windows: 439
   ✓ Sufficient data for analysis

7. Checking Price Volatility...

   Gold:
      Annualized volatility: 15.42%
      Max daily return: 10.79%
      Min daily return: -9.07%

   Platinum:
      Annualized volatility: 21.65%
  