# File Verification Notebook

**Purpose**: Verify what files you uploaded to Google Drive and check their structure.

**Expected**: 50 feature columns in parquet files, 3 model files (.pt)

## Step 1: Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Step 2: List Files in Your Drive Folder

In [None]:
import os

drive_path = '/content/drive/MyDrive/crpbot'

print(f"üìÇ Checking: {drive_path}\n")
print(f"Exists: {os.path.exists(drive_path)}\n")

if os.path.exists(drive_path):
    print("Contents:")
    !ls -lh /content/drive/MyDrive/crpbot/
    print("\n" + "="*60)
    
    # Check models folder
    if os.path.exists(f"{drive_path}/models"):
        print("\nüì¶ Models folder:")
        !ls -lh /content/drive/MyDrive/crpbot/models/
    
    # Check features folder
    if os.path.exists(f"{drive_path}/features"):
        print("\nüì¶ Features folder:")
        !ls -lh /content/drive/MyDrive/crpbot/features/
    
    # Check root folder for any parquet/pt files
    print("\nüì¶ Files in root:")
    !ls -lh /content/drive/MyDrive/crpbot/*.parquet 2>/dev/null || echo "No .parquet files in root"
    !ls -lh /content/drive/MyDrive/crpbot/*.pt 2>/dev/null || echo "No .pt files in root"
else:
    print("‚ùå ERROR: Folder not found!")
    print("Expected: /My Drive/crpbot/")

## Step 3: Verify Parquet File Structure

In [None]:
!pip install -q pandas pyarrow

In [None]:
import pandas as pd
import glob

# Search for parquet files in multiple locations
search_paths = [
    '/content/drive/MyDrive/crpbot/features/*.parquet',
    '/content/drive/MyDrive/crpbot/*.parquet'
]

all_files = []
for pattern in search_paths:
    all_files.extend(glob.glob(pattern))

print(f"\nüîç Found {len(all_files)} parquet file(s)\n")
print("="*80)

if all_files:
    for file_path in sorted(all_files):
        try:
            df = pd.read_parquet(file_path)
            
            # Count feature columns (exclude OHLCV, timestamp, etc.)
            exclude_cols = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'session', 'volatility_regime']
            feature_cols = [c for c in df.columns if c not in exclude_cols]
            
            file_name = file_path.split('/')[-1]
            print(f"\nüìÑ {file_name}")
            print(f"   Rows: {len(df):,}")
            print(f"   Total columns: {len(df.columns)}")
            print(f"   Feature columns: {len(feature_cols)}")
            print(f"   First 10 features: {feature_cols[:10]}")
            
            # Check if it's the right file
            if len(feature_cols) == 50:
                print("   ‚úÖ CORRECT: Has 50 features")
            else:
                print(f"   ‚ùå WRONG: Expected 50 features, got {len(feature_cols)}")
                print(f"   Missing features: Need to upload 50-feature version")
            
            print("   " + "-"*70)
            
        except Exception as e:
            print(f"\n‚ùå Error reading {file_path}: {e}")
else:
    print("\n‚ùå No parquet files found!")
    print("\nYou need to upload the feature files from your server:")
    print("  - features_BTC-USD_1m_2025-11-13_50feat.parquet (228 MB)")
    print("  - features_ETH-USD_1m_2025-11-13_50feat.parquet (218 MB)")
    print("  - features_SOL-USD_1m_2025-11-13_50feat.parquet (198 MB)")

print("\n" + "="*80)

## Step 4: Verify Model Files

In [None]:
import glob
import os

# Search for .pt files
search_paths = [
    '/content/drive/MyDrive/crpbot/models/*.pt',
    '/content/drive/MyDrive/crpbot/*.pt'
]

all_models = []
for pattern in search_paths:
    all_models.extend(glob.glob(pattern))

print(f"\nüîç Found {len(all_models)} model file(s)\n")
print("="*80)

if all_models:
    for model_path in sorted(all_models):
        file_name = model_path.split('/')[-1]
        file_size_mb = os.path.getsize(model_path) / (1024 * 1024)
        print(f"\nüìÑ {file_name}")
        print(f"   Size: {file_size_mb:.2f} MB")
        
        # Check if it's the expected file
        if '7b5f0829' in file_name:
            print("   ‚úÖ CORRECT: 50-feature model (7b5f0829)")
        else:
            print(f"   ‚ö†Ô∏è  Verify this is the correct model")
else:
    print("\n‚ùå No model files found!")
    print("\nYou need to upload the model files from your server:")
    print("  - lstm_BTC_USD_1m_7b5f0829.pt (3.9 MB)")
    print("  - lstm_ETH_USD_1m_7b5f0829.pt (3.9 MB)")
    print("  - lstm_SOL_USD_1m_7b5f0829.pt (3.9 MB)")

print("\n" + "="*80)

## Summary

**What you should see**:
- ‚úÖ 3 parquet files with **50 features** each
- ‚úÖ 3 model files (.pt) with hash **7b5f0829**

**If you see 36 features or different file names**:
1. You uploaded the WRONG files
2. Delete them from Google Drive
3. Upload the correct 50-feature files from `/root/crpbot/data/features/`
4. Upload the correct model files from `/root/crpbot/models/new/`

**Next step**: Once you confirm you have the right files, run `colab_crpbot_fixed.ipynb` for evaluation.