# Check Data Size
This notebook calculates the total size of `.parquet` files in `data/train_tracking` and `data/train_annotation` to verify memory requirements for preloading.

In [1]:
import os
from pathlib import Path

def get_dir_size(path):
    total_size = 0
    file_count = 0
    path_obj = Path(path)
    if not path_obj.exists():
        print(f"Warning: {path} does not exist.")
        return 0, 0
        
    for p in path_obj.rglob('*.parquet'):
        total_size += p.stat().st_size
        file_count += 1
    return total_size, file_count

# Adjust path based on where the notebook is running
# Assuming running from notebooks/ directory, data is in ../data
# But if running from root, data is in data/
# Let's check current working directory
print(f"Current Working Directory: {os.getcwd()}")

if os.path.exists('data'):
    data_root = Path('data')
elif os.path.exists('../data'):
    data_root = Path('../data')
else:
    # Fallback to absolute path if needed, or raise error
    data_root = Path('c:/Users/Windows11/Downloads/mice_social_action_new/data')

tracking_dir = data_root / 'train_tracking'
annotation_dir = data_root / 'train_annotation'

print(f"Scanning {tracking_dir}...")
track_size, track_count = get_dir_size(tracking_dir)
print(f"Tracking Data: {track_count} files, {track_size / (1024**3):.2f} GB")

print(f"Scanning {annotation_dir}...")
anno_size, anno_count = get_dir_size(annotation_dir)
print(f"Annotation Data: {anno_count} files, {anno_size / (1024**3):.2f} GB")

total_gb = (track_size + anno_size) / (1024**3)
print(f"\nTotal Size: {total_gb:.2f} GB")

Current Working Directory: c:\Users\Windows11\Downloads\mice_social_action_new\notebooks
Scanning ..\data\train_tracking...
Tracking Data: 8789 files, 2.63 GB
Scanning ..\data\train_annotation...
Annotation Data: 847 files, 0.00 GB

Total Size: 2.63 GB
