# HistoSweep: Full sweep of H&E images to identify good quality super-pixels for downstream ST analysis

##  *** Please enter input parameters ***

In [None]:
# ===== USER-DEFINED INPUT PARAMETERS =====

# Path prefix to your H&E image folder
HE_prefix = 'HE/demo/'

# Directory for output 
output_directory = "HistoSweep_Output" #Folder for HistoSweep output/results

# Flag for whether to rescale the image 
need_scaling_flag = False  # True if image resolution ≠ 0.5µm (or desired size) per pixel

# Flag for whether to preprocess the image 
need_preprocessing_flag = False  # True if image dimensions are not divisible by patch_size

# The pixel size (in microns) of the raw H&E image 
pixel_size_raw = 0.5  # Typically provided by the scanner/metadata (e.g., 0.25 µm/pixel for 40x)

# Parameter used determine amount of density filtering (e.g artifacts) (consider lowering for VERY large images)
density_thresh = 100 # Typically 100 works well, but may need to increase if artifacts are not being effectively removed (e.g. fiducial marker)

# Flag for whether to clean background (i.e. remove isolated debris and small specs outside tissue)
clean_background_flag = True # Set to False if you want to preserve fibrous regions that are otherwise being incorrectly filtered out

# Parameter used to remove isolated debris and small specs outside tissue
min_size = 10 # Decrease if there are many fibrous areas (e.g. adipose) in the tissue that you wish to retain (e.g. 5), increase if lots of/larger debris you wish to remove (e.g.50)


# ===== Additional PARAMETERS (typically do not need to change) =====

# Size of one square patch (superpixel) used throughout processing
patch_size = 16  # 16x16 pixels → typically 8µm if pixel_size = 0.5

# Target pixel size (in microns)
pixel_size = 0.5  # Final desired resolution; keep as 0.5 µm for standardization



Please store your raw histology image as 'he-raw.jpg', scaled image as 'he-scaled.jpg', and final preprocessed image as 'he.jpg' (if using the scaling and preprocessing function provided, this will automatically be done)

## Load in packages and basic functions

In [None]:
%load_ext autoreload
%autoreload 2

import os
from utils import load_image, get_image_filename
from saveParameters import saveParams
from computeMetrics import compute_metrics
from densityFiltering import compute_low_density_mask
from textureAnalysis import run_texture_analysis
from ratioFiltering import run_ratio_filtering
from generateMask import generate_final_mask
from additionalPlots import generate_additionalPlots

## Scale and preprocess H&E image 
Preprocess the image: <br>
(1) Scale so that each pixel is size 0.5 µm (he-scaled.jpg)<br>
(2) Pad the scaled image so its height and width are divisible by patch_size (he.jpg)<br>

In [None]:
# rescale and preprocess image

if need_scaling_flag:
    %run rescale.py --image --pixelSizeRaw {pixel_size_raw} --pixelSize {pixel_size} --prefix {HE_prefix}

if need_preprocessing_flag: 
    %run preprocess.py --image --patchSize {patch_size} --prefix {HE_prefix}

In [None]:
image = load_image(get_image_filename(HE_prefix+'he'))
print(image.shape)

## Patchify image into super-pixels and compute metrics

In [None]:
if not os.path.exists(f"{HE_prefix}{output_directory}"):
    os.makedirs(f"{HE_prefix}{output_directory}")

In [None]:
saveParams(HE_prefix, output_directory, need_scaling_flag, need_preprocessing_flag, pixel_size_raw,density_thresh,clean_background_flag,min_size,patch_size,pixel_size)


In [None]:
he_std_norm_image_, he_std_image_, z_v_norm_image_, z_v_image_, ratio_norm_, ratio_norm_image_ = compute_metrics(image, patch_size=patch_size)


## Define threshold critera:

### (1) Low density superpixels 

In [None]:
# identify low density superpixels
mask1_lowdensity = compute_low_density_mask(z_v_image_, he_std_image_, ratio_norm_, density_thresh=density_thresh)


In [None]:
print('Total selected for density filtering: ', mask1_lowdensity.sum())


In [None]:
# perform texture analysis 
mask1_lowdensity_update = run_texture_analysis(prefix=HE_prefix, image=image, tissue_mask=mask1_lowdensity, output_dir=output_directory, patch_size=patch_size, glcm_levels=64)


### (2) Low ratio superpixels

In [None]:
# identify low ratio superpixels
mask2_lowratio, otsu_thresh = run_ratio_filtering(ratio_norm_, mask1_lowdensity_update)

In [None]:
mask2_lowratio.shape

## Generate final selection of superpixels

In [None]:
generate_final_mask(prefix=HE_prefix, he=image, 
                    mask1_updated = mask1_lowdensity_update, mask2 = mask2_lowratio, output_dir = output_directory,
                    clean_background = clean_background_flag, 
                    super_pixel_size=patch_size, minSize = min_size)


## Generate additional plots
These plots are optional and are intended to provide further insights into the filtering process. Generating them  takes a bit of additional time. This step is not required for the core HistoSweep method.

In [None]:
import tracemalloc

tracemalloc.start()

a = [0] * 10**7  # allocate ~80 MB
current, peak = tracemalloc.get_traced_memory()
print(f"Current: {current}, Peak: {peak}")

del a  # free the list
current, peak = tracemalloc.get_traced_memory()
print(f"Current after delete: {current}, Peak: {peak}")

tracemalloc.stop()

In [None]:
import numpy as np
import tracemalloc

tracemalloc.start()

a = np.zeros((10000, 10000))  # ~800 MB in float64

current, peak = tracemalloc.get_traced_memory()
print(f"tracemalloc current: {current / 1024**2:.2f} MiB, peak: {peak / 1024**2:.2f} MiB")

In [1]:
import numpy as np
import tracemalloc

tracemalloc.start()
a = np.zeros((10000, 10000))  # ~800 MB, float64
current, peak = tracemalloc.get_traced_memory()
print(f"tracemalloc current: {current / 1024**2:.2f} MiB, peak: {peak / 1024**2:.2f} MiB")

tracemalloc current: 762.94 MiB, peak: 762.95 MiB


In [1]:
import numpy as np
import tracemalloc
import os
import psutil

tracemalloc.start()

# Get current process
process = psutil.Process(os.getpid())

# Allocate ~800 MB numpy array
a = np.zeros((10000, 10000), dtype=np.float64)  # 10,000 × 10,000 × 8 bytes ≈ 800 MB

current, peak = tracemalloc.get_traced_memory()
rss = process.memory_info().rss  # in bytes

print(f"tracemalloc current: {current / 1024 ** 2:.2f} MiB, peak: {peak / 1024 ** 2:.2f} MiB")
print(f"psutil process RSS: {rss / 1024 ** 2:.2f} MiB")

tracemalloc current: 762.95 MiB, peak: 762.96 MiB
psutil process RSS: 64.70 MiB


In [1]:
import tracemalloc
import psutil
import os
import numpy as np
import gc

def compare_memory_tracking():
    """对比 tracemalloc 和 psutil 在测量不同类型内存分配时的差异"""
    
    process = psutil.Process(os.getpid())
    
    print("=" * 70)
    print("tracemalloc vs psutil: C扩展内存分配对比")
    print("=" * 70)
    
    # 测试1: 纯Python列表（tracemalloc能准确测量）
    print("\n1. 纯Python列表分配:")
    print("-" * 40)
    
    # 清理内存
    gc.collect()
    
    # tracemalloc监控
    tracemalloc.start()
    memory_before_rss = process.memory_info().rss / 1024 / 1024
    snapshot_before = tracemalloc.take_snapshot()
    
    # 创建大的Python列表
    python_list = [i * 1.5 for i in range(1000000)]  # 约38MB的Python对象
    
    snapshot_after = tracemalloc.take_snapshot()
    memory_after_rss = process.memory_info().rss / 1024 / 1024
    
    # 计算结果
    top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
    tracemalloc_size = sum(stat.size for stat in top_stats) / 1024 / 1024
    psutil_size = memory_after_rss - memory_before_rss
    
    print(f"tracemalloc测量: {tracemalloc_size:.2f} MB")
    print(f"psutil测量:      {psutil_size:.2f} MB")
    print(f"差异:           {abs(tracemalloc_size - psutil_size):.2f} MB")
    
    tracemalloc.stop()
    del python_list
    gc.collect()
    
    # 测试2: NumPy数组（大量C内存分配，tracemalloc无法完全测量）
    print("\n2. NumPy数组分配 (C扩展):")
    print("-" * 40)
    
    gc.collect()
    
    tracemalloc.start()
    memory_before_rss = process.memory_info().rss / 1024 / 1024
    snapshot_before = tracemalloc.take_snapshot()
    
    # 创建大的NumPy数组 - 这会在C层分配大量内存
    numpy_array = np.random.random((2000, 2000)).astype(np.float64)  # 约30MB的C内存
    
    snapshot_after = tracemalloc.take_snapshot()
    memory_after_rss = process.memory_info().rss / 1024 / 1024
    
    # 计算结果
    top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
    tracemalloc_size = sum(stat.size for stat in top_stats) / 1024 / 1024
    psutil_size = memory_after_rss - memory_before_rss
    
    print(f"tracemalloc测量: {tracemalloc_size:.2f} MB")
    print(f"psutil测量:      {psutil_size:.2f} MB")
    print(f"差异:           {abs(tracemalloc_size - psutil_size):.2f} MB")
    print(f"NumPy数组实际大小: {numpy_array.nbytes / 1024 / 1024:.2f} MB")
    
    tracemalloc.stop()
    del numpy_array
    gc.collect()
    
    # 测试3: 更极端的例子 - 直接使用ctypes分配C内存
    print("\n3. 直接C内存分配 (ctypes):")
    print("-" * 40)
    
    import ctypes
    
    gc.collect()
    
    tracemalloc.start()
    memory_before_rss = process.memory_info().rss / 1024 / 1024
    snapshot_before = tracemalloc.take_snapshot()
    
    # 直接在C层分配100MB内存
    size_in_bytes = 100 * 1024 * 1024  # 100MB
    c_memory = ctypes.c_char * size_in_bytes
    buffer = c_memory()
    
    # 填充数据确保内存真正分配
    for i in range(0, size_in_bytes, 4096):  # 每4KB写一次
        buffer[i] = 1
    
    snapshot_after = tracemalloc.take_snapshot()
    memory_after_rss = process.memory_info().rss / 1024 / 1024
    
    # 计算结果
    top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
    tracemalloc_size = sum(stat.size for stat in top_stats) / 1024 / 1024
    psutil_size = memory_after_rss - memory_before_rss
    
    print(f"tracemalloc测量: {tracemalloc_size:.2f} MB")
    print(f"psutil测量:      {psutil_size:.2f} MB") 
    print(f"实际分配:       {size_in_bytes / 1024 / 1024:.2f} MB")
    print(f"差异:           {abs(tracemalloc_size - psutil_size):.2f} MB")
    
    tracemalloc.stop()
    del buffer
    gc.collect()
    
    print("\n" + "=" * 70)
    print("结论分析:")
    print("=" * 70)
    print("""
1. 纯Python对象:
   - tracemalloc 和 psutil 结果接近
   - tracemalloc 能准确追踪 Python 解释器的内存分配

2. NumPy数组:
   - tracemalloc 只能看到很少的内存分配 (主要是Python包装对象)
   - psutil 能看到完整的内存使用 (包括C层分配的数组数据)
   - 差异主要来自NumPy在C层直接分配的数组存储空间

3. 直接C内存分配:
   - tracemalloc 几乎测量不到内存分配
   - psutil 能准确反映系统级别的内存使用
   - 这种情况下两者差异最大

总结:
- tracemalloc: 只追踪Python解释器管理的内存
- psutil: 追踪整个进程的物理内存使用
- 当使用C扩展(NumPy, OpenCV, 数据库驱动等)时，两者差异明显
    """)

def demonstrate_real_world_example():
    """真实世界的例子：图像处理"""
    print("\n" + "=" * 70)
    print("真实场景示例: 图像处理中的内存分配")
    print("=" * 70)
    
    try:
        # 模拟图像处理场景
        gc.collect()
        
        process = psutil.Process(os.getpid())
        
        tracemalloc.start()
        memory_before = process.memory_info().rss / 1024 / 1024
        snapshot_before = tracemalloc.take_snapshot()
        
        # 创建多个大的NumPy数组模拟图像处理
        images = []
        for i in range(5):
            # 模拟 1920x1080 RGB图像
            img = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
            images.append(img)
        
        # 进行一些图像操作
        processed_images = []
        for img in images:
            # 模拟图像处理操作
            processed = img.astype(np.float32) / 255.0  # 归一化
            processed = processed * 1.2  # 亮度调整
            processed = np.clip(processed, 0, 1)  # 裁剪
            processed_images.append(processed)
        
        snapshot_after = tracemalloc.take_snapshot()
        memory_after = process.memory_info().rss / 1024 / 1024
        
        # 计算结果
        top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
        tracemalloc_size = sum(stat.size for stat in top_stats) / 1024 / 1024
        psutil_size = memory_after - memory_before
        
        # 计算实际数据大小
        total_image_size = sum(img.nbytes for img in images + processed_images) / 1024 / 1024
        
        print(f"图像数据实际大小: {total_image_size:.2f} MB")
        print(f"tracemalloc测量:  {tracemalloc_size:.2f} MB")
        print(f"psutil测量:       {psutil_size:.2f} MB")
        print(f"测量差异:         {abs(tracemalloc_size - psutil_size):.2f} MB")
        
        print(f"\n这个例子显示了在图像处理等场景中:")
        print(f"- NumPy数组的实际数据存储在C层")
        print(f"- tracemalloc主要测量到Python对象的开销")
        print(f"- psutil能反映真实的内存使用情况")
        
        tracemalloc.stop()
        del images, processed_images
        gc.collect()
        
    except Exception as e:
        print(f"示例执行出错: {e}")

if __name__ == "__main__":
    compare_memory_tracking()
    demonstrate_real_world_example()

tracemalloc vs psutil: C扩展内存分配对比

1. 纯Python列表分配:
----------------------------------------
tracemalloc测量: 30.95 MB
psutil测量:      304.23 MB
差异:           273.28 MB

2. NumPy数组分配 (C扩展):
----------------------------------------
tracemalloc测量: 30.52 MB
psutil测量:      -82.53 MB
差异:           113.05 MB
NumPy数组实际大小: 30.52 MB

3. 直接C内存分配 (ctypes):
----------------------------------------
tracemalloc测量: 100.00 MB
psutil测量:      100.00 MB
实际分配:       100.00 MB
差异:           0.00 MB

结论分析:

1. 纯Python对象:
   - tracemalloc 和 psutil 结果接近
   - tracemalloc 能准确追踪 Python 解释器的内存分配

2. NumPy数组:
   - tracemalloc 只能看到很少的内存分配 (主要是Python包装对象)
   - psutil 能看到完整的内存使用 (包括C层分配的数组数据)
   - 差异主要来自NumPy在C层直接分配的数组存储空间

3. 直接C内存分配:
   - tracemalloc 几乎测量不到内存分配
   - psutil 能准确反映系统级别的内存使用
   - 这种情况下两者差异最大

总结:
- tracemalloc: 只追踪Python解释器管理的内存
- psutil: 追踪整个进程的物理内存使用
- 当使用C扩展(NumPy, OpenCV, 数据库驱动等)时，两者差异明显
    

真实场景示例: 图像处理中的内存分配
图像数据实际大小: 148.32 MB
tracemalloc测量:  148.33 MB
psutil测量:       0.29 MB
测量差异:         148.03 M

In [2]:
import tracemalloc
import psutil
import os
import numpy as np
import gc
import time

class AccurateMemoryMonitor:
    def __init__(self, test_name):
        self.test_name = test_name
        self.process = psutil.Process(os.getpid())
        
    def __enter__(self):
        print(f"\n{self.test_name}")
        print("-" * 50)
        
        # 强制多次垃圾回收，稳定内存状态
        for _ in range(3):
            gc.collect()
        time.sleep(0.1)  # 等待系统稳定
        
        # 开始监控
        tracemalloc.start()
        self.memory_before = self.process.memory_info().rss / 1024 / 1024
        self.snapshot_before = tracemalloc.take_snapshot()
        
        print(f"基准内存: {self.memory_before:.2f} MB")
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        # 结束监控
        self.snapshot_after = tracemalloc.take_snapshot()
        self.memory_after = self.process.memory_info().rss / 1024 / 1024
        
        # 计算差异
        top_stats = self.snapshot_after.compare_to(self.snapshot_before, 'lineno')
        tracemalloc_size = sum(stat.size for stat in top_stats) / 1024 / 1024
        psutil_size = self.memory_after - self.memory_before
        
        print(f"结束内存: {self.memory_after:.2f} MB")
        print(f"tracemalloc测量: {tracemalloc_size:.2f} MB")
        print(f"psutil测量:      {psutil_size:.2f} MB")
        print(f"差异:           {abs(tracemalloc_size - psutil_size):.2f} MB")
        
        # 显示最大的内存分配来源
        if top_stats:
            print(f"最大分配来源: {top_stats[0]}")
        
        tracemalloc.stop()

def test_pure_python():
    """测试纯Python内存分配"""
    with AccurateMemoryMonitor("1. 纯Python列表分配"):
        # 创建中等大小的列表，避免内存池效应
        data = [i * 1.0 for i in range(1000000)]  # 约24MB
        # 确保数据被使用，防止优化
        _ = sum(data[:1000])

def test_numpy_array():
    """测试NumPy数组分配"""
    with AccurateMemoryMonitor("2. NumPy数组分配"):
        # 创建NumPy数组
        arr = np.random.random((1500, 1500)).astype(np.float64)  # 约17MB
        print(f"NumPy数组实际大小: {arr.nbytes / 1024 / 1024:.2f} MB")
        # 确保数组被使用
        _ = np.sum(arr[:100, :100])

def test_multiple_numpy_operations():
    """测试多个NumPy操作"""
    with AccurateMemoryMonitor("3. 复杂NumPy操作"):
        arrays = []
        for i in range(3):
            arr = np.random.random((800, 800)).astype(np.float32)  # 每个约2.4MB
            transformed = np.fft.fft2(arr)  # FFT变换，创建复数数组
            result = np.abs(transformed)    # 求模，创建新数组
            arrays.append(result)
        
        total_size = sum(arr.nbytes for arr in arrays) / 1024 / 1024
        print(f"所有数组实际大小: {total_size:.2f} MB") 

def test_ctypes_allocation():
    """测试ctypes内存分配"""
    import ctypes
    
    with AccurateMemoryMonitor("4. ctypes直接分配"):
        # 分配较小的内存块，更容易观察
        size_mb = 50
        size_bytes = size_mb * 1024 * 1024
        
        # 创建ctypes数组
        ArrayType = ctypes.c_byte * size_bytes
        buffer = ArrayType()
        
        # 写入数据确保内存被分配
        for i in range(0, size_bytes, 4096):
            buffer[i] = i % 256
            
        print(f"ctypes分配大小: {size_mb} MB")

def test_mixed_scenario():
    """测试混合场景：Python + NumPy"""
    with AccurateMemoryMonitor("5. 混合场景 (Python + NumPy)"):
        # Python列表
        python_data = [i ** 2 for i in range(500000)]
        
        # NumPy数组
        numpy_data = np.array(python_data, dtype=np.float64)
        
        # 更多NumPy操作
        processed = numpy_data * 2 + 1
        final_result = np.sqrt(processed)
        
        python_size = len(python_data) * 8 / 1024 / 1024  # 估算
        numpy_size = (numpy_data.nbytes + processed.nbytes + final_result.nbytes) / 1024 / 1024
        
        print(f"Python数据估算: {python_size:.2f} MB")
        print(f"NumPy数据实际: {numpy_size:.2f} MB")
        print(f"总计估算: {python_size + numpy_size:.2f} MB")

def main():
    print("=" * 70)
    print("改进的内存监控对比测试")
    print("=" * 70)
    print("注意：测试间会进行垃圾回收以获得更准确的结果")
    
    # 预热，让Python解释器稳定
    _ = [i for i in range(1000)]
    gc.collect()
    time.sleep(0.5)
    
    # 依次执行测试
    test_pure_python()
    gc.collect()
    time.sleep(0.5)
    
    test_numpy_array()  
    gc.collect()
    time.sleep(0.5)
    
    test_multiple_numpy_operations()
    gc.collect() 
    time.sleep(0.5)
    
    test_ctypes_allocation()
    gc.collect()
    time.sleep(0.5)
    
    test_mixed_scenario()
    
    print("\n" + "=" * 70)
    print("关键观察点:")
    print("=" * 70)
    print("""
1. tracemalloc准确的情况:
   - 纯Python对象分配
   - ctypes创建的Python包装对象

2. tracemalloc不准确的情况:
   - NumPy数组的底层数据存储
   - 通过C API直接分配的内存

3. psutil的优势:
   - 反映真实的系统内存使用
   - 包括所有类型的内存分配

4. 测量建议:
   - 开发调试: 使用tracemalloc分析Python代码
   - 性能监控: 使用psutil了解真实内存消耗
   - 生产环境: 结合两者使用
    """)

if __name__ == "__main__":
    main()

改进的内存监控对比测试
注意：测试间会进行垃圾回收以获得更准确的结果

1. 纯Python列表分配
--------------------------------------------------
基准内存: 235.73 MB
结束内存: 372.82 MB
tracemalloc测量: 30.95 MB
psutil测量:      137.09 MB
差异:           106.14 MB
最大分配来源: /tmp/ipykernel_96966/2734209926.py:55: size=30.9 MiB (+30.9 MiB), count=1000002 (+1000002), average=32 B

2. NumPy数组分配
--------------------------------------------------
基准内存: 270.93 MB
NumPy数组实际大小: 17.17 MB
结束内存: 270.93 MB
tracemalloc测量: 17.17 MB
psutil测量:      0.00 MB
差异:           17.17 MB
最大分配来源: /tmp/ipykernel_96966/2734209926.py:63: size=17.2 MiB (+17.2 MiB), count=2 (+2), average=8789 KiB

3. 复杂NumPy操作
--------------------------------------------------
基准内存: 269.96 MB
所有数组实际大小: 14.65 MB
结束内存: 269.96 MB
tracemalloc测量: 26.87 MB
psutil测量:      0.00 MB
差异:           26.87 MB
最大分配来源: /tmp/ipykernel_96966/2734209926.py:75: size=14.6 MiB (+14.6 MiB), count=7 (+7), average=2143 KiB

4. ctypes直接分配
--------------------------------------------------
基准内存: 269.96 MB
ctypes分配大小: 5

In [1]:
import numpy as np
import tracemalloc
import os
import psutil

# Start tracemalloc
tracemalloc.start()

# Get process info
process = psutil.Process(os.getpid())

# Before memory
rss_before = process.memory_info().rss

# Allocate large numpy array (~800 MB)
a = np.random.rand(10000, 10000)

# Run np.std() → should allocate big C buffer internally
std_val = np.std(a)

# After memory
rss_after = process.memory_info().rss

# Get tracemalloc stats
current, peak = tracemalloc.get_traced_memory()

print(f"[tracemalloc] Current: {current / (1024 ** 2):.2f} MiB; Peak: {peak / (1024 ** 2):.2f} MiB")
print(f"[psutil] Process RSS before: {rss_before / (1024 ** 2):.2f} MiB; after: {rss_after / (1024 ** 2):.2f} MiB; delta: {(rss_after - rss_before) / (1024 ** 2):.2f} MiB")

tracemalloc.stop()

[tracemalloc] Current: 762.95 MiB; Peak: 1525.96 MiB
[psutil] Process RSS before: 64.71 MiB; after: 827.93 MiB; delta: 763.22 MiB


In [1]:
import numpy as np
import tracemalloc
import psutil
import os

tracemalloc.start()
process = psutil.Process(os.getpid())
rss_before = process.memory_info().rss

# Only numpy array allocation
a = np.zeros((10000, 10000))  # ~800 MB

rss_after = process.memory_info().rss
current, peak = tracemalloc.get_traced_memory()

print(f"[tracemalloc] Current: {current / 1024 ** 2:.2f} MiB; Peak: {peak / 1024 ** 2:.2f} MiB")
print(f"[psutil] Process RSS before: {rss_before / 1024 ** 2:.2f} MiB; after: {rss_after / 1024 ** 2:.2f} MiB; delta: {(rss_after - rss_before) / 1024 ** 2:.2f} MiB")

tracemalloc.stop()

[tracemalloc] Current: 762.95 MiB; Peak: 762.98 MiB
[psutil] Process RSS before: 64.70 MiB; after: 64.70 MiB; delta: 0.00 MiB


In [1]:
import torch
import tracemalloc
import psutil
import os

tracemalloc.start()
process = psutil.Process(os.getpid())
rss_before = process.memory_info().rss

a = torch.zeros((10000, 10000), device='cuda')  # ~400 MB on GPU

rss_after = process.memory_info().rss
current, peak = tracemalloc.get_traced_memory()

print(f"[tracemalloc] Current: {current / 1024 ** 2:.2f} MiB; Peak: {peak / 1024 ** 2:.2f} MiB")
print(f"[psutil] Process RSS before: {rss_before / 1024 ** 2:.2f} MiB; after: {rss_after / 1024 ** 2:.2f} MiB; delta: {(rss_after - rss_before) / 1024 ** 2:.2f} MiB")

[tracemalloc] Current: 0.02 MiB; Peak: 0.06 MiB
[psutil] Process RSS before: 323.82 MiB; after: 467.82 MiB; delta: 144.00 MiB


In [2]:
import numpy as np
import psutil
import os
import gc
import time
from skimage.color import rgb2gray
import cv2

def get_memory_usage():
    """获取当前进程的内存使用量(GB)"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024**3

def manual_rgb2gray_float(image):
    """手动实现rgb2gray - 保持float64"""
    return 0.2125 * image[:,:,0] + 0.7154 * image[:,:,1] + 0.0721 * image[:,:,2]

def manual_rgb2gray_uint8(image):
    """手动实现rgb2gray - 直接转uint8"""
    return (0.2125 * image[:,:,0] + 0.7154 * image[:,:,1] + 0.0721 * image[:,:,2]).astype(np.uint8)

def opencv_rgb2gray(image):
    """使用OpenCV转换"""
    return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

def test_rgb2gray_memory(height=9872, width=9312):
    """测试不同rgb2gray方法的内存使用"""
    
    print(f"=== RGB2Gray 内存使用测试 ===")
    print(f"图像尺寸: ({height}, {width}, 3)")
    print(f"理论内存大小: {height * width * 3 / 1024**2:.1f} MB")
    print("-" * 60)
    
    # 创建测试图像
    print("创建测试图像...")
    baseline = get_memory_usage()
    print(f"基线内存: {baseline:.4f} GB")
    
    # 创建随机RGB图像
    image = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
    after_create = get_memory_usage()
    print(f"创建图像后: {after_create:.4f} GB (+{after_create-baseline:.4f} GB)")
    
    results = {}
    
    # 测试1: skimage rgb2gray
    print("\n--- 测试 skimage.rgb2gray ---")
    gc.collect()  # 强制垃圾回收
    before = get_memory_usage()
    print(f"转换前: {before:.4f} GB")
    
    start_time = time.time()
    gray_skimage = rgb2gray(image)
    end_time = time.time()
    
    after = get_memory_usage()
    peak = after
    print(f"转换后: {after:.4f} GB (+{after-before:.4f} GB)")
    print(f"转换时间: {end_time-start_time:.3f} 秒")
    print(f"输出类型: {gray_skimage.dtype}, 形状: {gray_skimage.shape}")
    
    results['skimage'] = {
        'memory_increase': after - before,
        'peak_memory': peak,
        'time': end_time - start_time,
        'dtype': gray_skimage.dtype
    }
    
    del gray_skimage
    gc.collect()
    
    # 测试2: 手动实现 (float64)
    print("\n--- 测试 手动实现 (float64) ---")
    before = get_memory_usage()
    print(f"转换前: {before:.4f} GB")
    
    start_time = time.time()
    gray_manual_float = manual_rgb2gray_float(image)
    end_time = time.time()
    
    after = get_memory_usage()
    print(f"转换后: {after:.4f} GB (+{after-before:.4f} GB)")
    print(f"转换时间: {end_time-start_time:.3f} 秒")
    print(f"输出类型: {gray_manual_float.dtype}, 形状: {gray_manual_float.shape}")
    
    results['manual_float'] = {
        'memory_increase': after - before,
        'peak_memory': after,
        'time': end_time - start_time,
        'dtype': gray_manual_float.dtype
    }
    
    del gray_manual_float
    gc.collect()
    
    # 测试3: 手动实现 (uint8)
    print("\n--- 测试 手动实现 (uint8) ---")
    before = get_memory_usage()
    print(f"转换前: {before:.4f} GB")
    
    start_time = time.time()
    gray_manual_uint8 = manual_rgb2gray_uint8(image)
    end_time = time.time()
    
    after = get_memory_usage()
    print(f"转换后: {after:.4f} GB (+{after-before:.4f} GB)")
    print(f"转换时间: {end_time-start_time:.3f} 秒")
    print(f"输出类型: {gray_manual_uint8.dtype}, 形状: {gray_manual_uint8.shape}")
    
    results['manual_uint8'] = {
        'memory_increase': after - before,
        'peak_memory': after,
        'time': end_time - start_time,
        'dtype': gray_manual_uint8.dtype
    }
    
    del gray_manual_uint8
    gc.collect()
    
    # 测试4: OpenCV
    print("\n--- 测试 OpenCV ---")
    before = get_memory_usage()
    print(f"转换前: {before:.4f} GB")
    
    start_time = time.time()
    gray_opencv = opencv_rgb2gray(image)
    end_time = time.time()
    
    after = get_memory_usage()
    print(f"转换后: {after:.4f} GB (+{after-before:.4f} GB)")
    print(f"转换时间: {end_time-start_time:.3f} 秒")
    print(f"输出类型: {gray_opencv.dtype}, 形状: {gray_opencv.shape}")
    
    results['opencv'] = {
        'memory_increase': after - before,
        'peak_memory': after,
        'time': end_time - start_time,
        'dtype': gray_opencv.dtype
    }
    
    del gray_opencv
    gc.collect()
    
    # 汇总结果
    print("\n" + "="*80)
    print("汇总结果:")
    print("-"*80)
    print(f"{'方法':<15} {'内存增长(GB)':<12} {'时间(秒)':<10} {'输出类型':<10}")
    print("-"*80)
    
    for method, result in results.items():
        print(f"{method:<15} {result['memory_increase']:<12.4f} {result['time']:<10.3f} {str(result['dtype']):<10}")
    
    print("\n最优选择建议:")
    min_memory = min(results.values(), key=lambda x: x['memory_increase'])
    min_time = min(results.values(), key=lambda x: x['time'])
    
    for method, result in results.items():
        if result == min_memory:
            print(f"- 最省内存: {method} ({result['memory_increase']:.4f} GB)")
        if result == min_time:
            print(f"- 最快速度: {method} ({result['time']:.3f} 秒)")
    
    # 清理
    del image
    gc.collect()
    final_memory = get_memory_usage()
    print(f"\n清理后内存: {final_memory:.4f} GB")

def test_step_by_step_skimage(height=5000, width=5000):
    """逐步分析skimage rgb2gray的内存使用"""
    print(f"\n=== 逐步分析 skimage rgb2gray 内存使用 ===")
    print(f"图像尺寸: ({height}, {width}, 3)")
    
    # 创建图像
    image = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
    baseline = get_memory_usage()
    print(f"基线内存(含图像): {baseline:.4f} GB")
    
    # 手动实现skimage的步骤
    print("\n逐步执行:")
    
    # 步骤1: 提取RGB通道
    print("1. 提取RGB通道...")
    before = get_memory_usage()
    r = image[:,:,0].astype(np.float64)
    after1 = get_memory_usage()
    print(f"   提取R通道后: {after1:.4f} GB (+{after1-before:.4f} GB)")
    
    g = image[:,:,1].astype(np.float64) 
    after2 = get_memory_usage()
    print(f"   提取G通道后: {after2:.4f} GB (+{after2-after1:.4f} GB)")
    
    b = image[:,:,2].astype(np.float64)
    after3 = get_memory_usage()
    print(f"   提取B通道后: {after3:.4f} GB (+{after3-after2:.4f} GB)")
    
    # 步骤2: 加权求和
    print("2. 加权求和...")
    gray_calc = 0.2125 * r + 0.7154 * g + 0.0721 * b
    after4 = get_memory_usage()
    print(f"   加权求和后: {after4:.4f} GB (+{after4-after3:.4f} GB)")
    
    # 步骤3: ascontiguousarray (skimage内部调用)
    print("3. ascontiguousarray...")
    gray_contiguous = np.ascontiguousarray(gray_calc)
    after5 = get_memory_usage()
    print(f"   连续化后: {after5:.4f} GB (+{after5-after4:.4f} GB)")
    
    print(f"\n总内存增长: {after5-baseline:.4f} GB")
    
    # 清理
    del image, r, g, b, gray_calc, gray_contiguous
    gc.collect()

if __name__ == "__main__":
    # 运行主测试
    test_rgb2gray_memory()
    
    # 运行详细分析
    test_step_by_step_skimage()

=== RGB2Gray 内存使用测试 ===
图像尺寸: (9872, 9312, 3)
理论内存大小: 263.0 MB
------------------------------------------------------------
创建测试图像...
基线内存: 0.1847 GB
创建图像后: 0.4418 GB (+0.2571 GB)

--- 测试 skimage.rgb2gray ---
转换前: 0.4871 GB
转换后: 1.1721 GB (+0.6850 GB)
转换时间: 0.964 秒
输出类型: float64, 形状: (9872, 9312)

--- 测试 手动实现 (float64) ---
转换前: 0.4872 GB
转换后: 1.1726 GB (+0.6854 GB)
转换时间: 1.483 秒
输出类型: float64, 形状: (9872, 9312)

--- 测试 手动实现 (uint8) ---
转换前: 0.4877 GB
转换后: 0.5733 GB (+0.0856 GB)
转换时间: 1.325 秒
输出类型: uint8, 形状: (9872, 9312)

--- 测试 OpenCV ---
转换前: 0.4877 GB
转换后: 0.5701 GB (+0.0824 GB)
转换时间: 0.047 秒
输出类型: uint8, 形状: (9872, 9312)

汇总结果:
--------------------------------------------------------------------------------
方法              内存增长(GB)     时间(秒)      输出类型      
--------------------------------------------------------------------------------
skimage         0.6850       0.964      float64   
manual_float    0.6854       1.483      float64   
manual_uint8    0.0856       1.325      uint8 

In [7]:
import numpy as np
import psutil
import os
import gc
import sys

def get_memory_usage():
    """获取当前进程的内存使用量(GB)"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024**3

def get_memory_info():
    """获取详细内存信息"""
    process = psutil.Process(os.getpid())
    memory = process.memory_info()
    return {
        'rss': memory.rss / 1024**3,  # 物理内存
        'vms': memory.vms / 1024**3,  # 虚拟内存
        'percent': process.memory_percent()
    }

def debug_texture_analysis_memory():
    """逐步调试texture analysis的内存使用"""
    
    print("=== 纹理分析内存调试 ===")
    
    # 模拟你的数据尺寸
    image_shape = (9872, 9312, 3)
    mask_shape = (617, 582)
    
    print(f"图像尺寸: {image_shape}")
    print(f"mask尺寸: {mask_shape}")
    print(f"循环次数: {mask_shape[0] * mask_shape[1]:,}")
    
    baseline = get_memory_usage()
    print(f"\n初始内存: {baseline:.4f} GB")
    
    # 步骤1: 导入库
    print("\n--- 导入库的内存影响 ---")
    
    import pandas as pd
    after_pandas = get_memory_usage()
    print(f"导入pandas后: {after_pandas:.4f} GB (+{after_pandas-baseline:.4f} GB)")
    
    import matplotlib.pyplot as plt
    after_plt = get_memory_usage()
    print(f"导入matplotlib后: {after_plt:.4f} GB (+{after_plt-after_pandas:.4f} GB)")
    
    from skimage.feature import graycomatrix, graycoprops
    from skimage.color import rgb2gray
    after_skimage = get_memory_usage()
    print(f"导入skimage后: {after_skimage:.4f} GB (+{after_skimage-after_plt:.4f} GB)")
    
    import cv2
    after_cv2 = get_memory_usage()
    print(f"导入cv2后: {after_cv2:.4f} GB (+{after_cv2-after_skimage:.4f} GB)")
    
    # 步骤2: 创建数据
    print("\n--- 创建数据 ---")
    
    # 创建图像
    image = np.random.randint(0, 255, image_shape, dtype=np.uint8)
    after_image = get_memory_usage()
    print(f"创建image后: {after_image:.4f} GB (+{after_image-after_cv2:.4f} GB)")
    
    # 创建mask
    mask = np.random.choice([True, False], mask_shape, p=[0.1, 0.9])  # 10%为True
    after_mask = get_memory_usage()
    print(f"创建mask后: {after_mask:.4f} GB (+{after_mask-after_image:.4f} GB)")
    
    # 步骤3: rgb2gray
    print("\n--- RGB转灰度 ---")
    gray_image = rgb2gray(image)
    after_gray = get_memory_usage()
    print(f"rgb2gray后: {after_gray:.4f} GB (+{after_gray-after_mask:.4f} GB)")
    
    gray_image = (gray_image * 255).astype(np.uint8)
    after_convert = get_memory_usage()
    print(f"转换uint8后: {after_convert:.4f} GB (+{after_convert-after_gray:.4f} GB)")
    
    # 步骤4: 创建特征图
    print("\n--- 创建特征图 ---")
    
    energy_map = np.full(mask_shape, np.nan)
    homogeneity_map = np.full(mask_shape, np.nan)
    entropy_map = np.full(mask_shape, np.nan)
    sharpness_map = np.full(mask_shape, np.nan)
    after_maps = get_memory_usage()
    print(f"创建4个特征图后: {after_maps:.4f} GB (+{after_maps-after_convert:.4f} GB)")
    
    # 步骤5: 模拟部分循环
    print("\n--- 模拟循环处理 ---")
    
    patch_size = 16
    count = 0
    memory_samples = []
    
    for i in range(min(50, mask_shape[0])):  # 只测试前50行
        for j in range(min(50, mask_shape[1])):  # 只测试前50列
            if mask[i, j]:
                count += 1
                
                # 模拟你的操作
                patch_gray = gray_image[i * patch_size:(i + 1) * patch_size, j * patch_size:(j + 1) * patch_size]
                patch_rgb = image[i * patch_size:(i + 1) * patch_size, j * patch_size:(j + 1) * patch_size, :]
                
                # 确保patch_gray的值在正确范围内
                patch_gray = np.clip(patch_gray, 0, 63)  # 确保值在0-63之间
                
                # GLCM计算
                glcm = graycomatrix(patch_gray, distances=[1], angles=[0], levels=64, symmetric=True, normed=True)
                energy = graycoprops(glcm, 'energy')[0, 0]
                homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
                
                # 每100次检查内存
                if count % 100 == 0:
                    current_mem = get_memory_usage()
                    memory_samples.append(current_mem)
                    print(f"  处理{count}个patch后: {current_mem:.4f} GB")
    
    if len(memory_samples) > 1:
        memory_growth = memory_samples[-1] - memory_samples[0]
        avg_per_patch = memory_growth / (count if count > 0 else 1) * 1024  # MB
        print(f"  循环内存增长: {memory_growth:.4f} GB")
        print(f"  平均每patch: {avg_per_patch:.4f} MB")
        
        # 估算全部循环的内存增长
        total_patches = np.sum(mask)
        estimated_growth = avg_per_patch * total_patches / 1024  # GB
        print(f"  估算全部循环增长: {estimated_growth:.4f} GB")
    
    # 步骤6: DataFrame创建
    print("\n--- DataFrame创建 ---")
    
    # 创建归一化的特征图 (模拟)
    energy_map_norm = np.random.rand(*mask_shape)
    homogeneity_map_norm = np.random.rand(*mask_shape)
    entropy_map_norm = np.random.rand(*mask_shape)
    
    before_df = get_memory_usage()
    
    features = pd.DataFrame({
        'homogeneity': homogeneity_map_norm.flatten(),
        'energy': energy_map_norm.flatten(),
        'entropy': entropy_map_norm.flatten(),
    })
    
    after_df = get_memory_usage()
    print(f"创建DataFrame后: {after_df:.4f} GB (+{after_df-before_df:.4f} GB)")
    print(f"DataFrame形状: {features.shape}")
    print(f"DataFrame内存使用: {features.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    
    # 步骤7: 画图保存
    print("\n--- 图像保存 ---")
    
    from PIL import Image
    
    before_save = get_memory_usage()
    
    # 模拟save_colormapped_map
    colormap = plt.get_cmap("jet")
    colored = colormap(energy_map_norm)[:, :, :3]
    colored = (colored * 255).astype(np.uint8)
    
    after_colormap = get_memory_usage()
    print(f"颜色映射后: {after_colormap:.4f} GB (+{after_colormap-before_save:.4f} GB)")
    
    # 总结
    print("\n" + "="*60)
    print("内存使用总结:")
    print(f"初始: {baseline:.4f} GB")
    print(f"导入库: +{after_cv2-baseline:.4f} GB")
    print(f"创建数据: +{after_mask-after_cv2:.4f} GB")
    print(f"RGB转换: +{after_convert-after_mask:.4f} GB")
    print(f"特征图: +{after_maps-after_convert:.4f} GB")
    print(f"DataFrame: +{after_df-before_df:.4f} GB")
    print(f"颜色映射: +{after_colormap-before_save:.4f} GB")
    print(f"总计: {after_colormap:.4f} GB")
    
    # 清理内存
    del image, mask, gray_image, features
    gc.collect()

def analyze_object_memory():
    """分析当前内存中的对象"""
    print("\n=== 内存对象分析 ===")
    
    # 获取所有对象的类型统计
    objects = gc.get_objects()
    type_counts = {}
    type_sizes = {}
    
    for obj in objects:
        obj_type = type(obj).__name__
        type_counts[obj_type] = type_counts.get(obj_type, 0) + 1
        
        # 尝试获取对象大小
        try:
            size = sys.getsizeof(obj)
            type_sizes[obj_type] = type_sizes.get(obj_type, 0) + size
        except:
            pass
    
    # 按数量排序
    print("对象数量前10:")
    sorted_counts = sorted(type_counts.items(), key=lambda x: x[1], reverse=True)
    for obj_type, count in sorted_counts[:10]:
        print(f"  {obj_type}: {count:,}")
    
    # 按大小排序  
    print("\n对象大小前10:")
    sorted_sizes = sorted(type_sizes.items(), key=lambda x: x[1], reverse=True)
    for obj_type, size in sorted_sizes[:10]:
        print(f"  {obj_type}: {size/1024**2:.2f} MB")

if __name__ == "__main__":
    debug_texture_analysis_memory()
    analyze_object_memory()

=== 纹理分析内存调试 ===
图像尺寸: (9872, 9312, 3)
mask尺寸: (617, 582)
循环次数: 359,094

初始内存: 0.7268 GB

--- 导入库的内存影响 ---
导入pandas后: 0.7268 GB (+0.0000 GB)
导入matplotlib后: 0.7268 GB (+0.0000 GB)
导入skimage后: 0.7268 GB (+0.0000 GB)
导入cv2后: 0.7268 GB (+0.0000 GB)

--- 创建数据 ---
创建image后: 0.9837 GB (+0.2568 GB)
创建mask后: 0.9909 GB (+0.0072 GB)

--- RGB转灰度 ---
rgb2gray后: 1.6758 GB (+0.6849 GB)
转换uint8后: 1.0765 GB (+-0.5993 GB)

--- 创建特征图 ---
创建4个特征图后: 1.0792 GB (+0.0027 GB)

--- 模拟循环处理 ---
  处理100个patch后: 1.0793 GB
  处理200个patch后: 1.0793 GB
  循环内存增长: 0.0000 GB
  平均每patch: 0.0000 MB
  估算全部循环增长: 0.0000 GB

--- DataFrame创建 ---
创建DataFrame后: 1.1035 GB (+0.0163 GB)
DataFrame形状: (359094, 3)
DataFrame内存使用: 8.22 MB

--- 图像保存 ---
颜色映射后: 1.1047 GB (+0.0010 GB)

内存使用总结:
初始: 0.7268 GB
导入库: +0.0000 GB
创建数据: +0.2641 GB
RGB转换: +0.0856 GB
特征图: +0.0027 GB
DataFrame: +0.0163 GB
颜色映射: +0.0010 GB
总计: 1.1047 GB

=== 内存对象分析 ===
对象数量前10:
  dict: 59,926
  function: 49,285
  list: 36,659
  tuple: 33,632
  cell: 16,581
  builtin_func

In [1]:
import numpy as np
import psutil
import os
import time
import threading
from skimage.color import rgb2gray
import cv2

def get_memory_usage():
    """获取当前进程的内存使用量(GB)"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024**3

def monitor_memory_continuously(duration, interval=0.001):
    """持续监控内存使用，返回峰值"""
    peak_memory = 0
    samples = []
    start_time = time.time()
    
    while time.time() - start_time < duration:
        current_mem = get_memory_usage()
        peak_memory = max(peak_memory, current_mem)
        samples.append((time.time() - start_time, current_mem))
        time.sleep(interval)
    
    return peak_memory, samples

def test_rgb2gray_alternatives():
    """测试不同rgb2gray方法的内存使用"""
    
    # 创建测试图像
    print("创建测试图像...")
    image_shape = (9872, 9312, 3)
    image = np.random.randint(0, 255, image_shape, dtype=np.uint8)
    baseline = get_memory_usage()
    print(f"基线内存: {baseline:.4f} GB")
    print(f"图像大小: {image.nbytes/1024**2:.1f} MB")
    
    results = {}
    
    # 测试1: 原始skimage方法
    print("\n=== 测试1: skimage rgb2gray ===")
    gc.collect()
    
    before = get_memory_usage()
    print(f"开始前: {before:.4f} GB")
    
    # 启动连续内存监控
    peak_memory = [0]
    monitoring = [True]
    
    def continuous_monitor():
        while monitoring[0]:
            current = get_memory_usage()
            peak_memory[0] = max(peak_memory[0], current)
            time.sleep(0.0001)  # 每0.1ms检查一次
    
    monitor_thread = threading.Thread(target=continuous_monitor)
    monitor_thread.start()
    
    # 执行rgb2gray
    start_time = time.time()
    gray_skimage = rgb2gray(image)
    end_time = time.time()
    
    monitoring[0] = False
    monitor_thread.join()
    
    after = get_memory_usage()
    print(f"完成后: {after:.4f} GB")
    print(f"监控到的峰值: {peak_memory[0]:.4f} GB")
    print(f"峰值增长: +{peak_memory[0] - before:.4f} GB")
    print(f"用时: {end_time - start_time:.3f} 秒")
    print(f"输出类型: {gray_skimage.dtype}")
    
    results['skimage'] = {
        'peak_growth': peak_memory[0] - before,
        'final_growth': after - before,
        'time': end_time - start_time
    }
    
    del gray_skimage
    gc.collect()
    time.sleep(0.1)
    
    # 测试2: OpenCV方法
    print("\n=== 测试2: OpenCV ===")
    before = get_memory_usage()
    print(f"开始前: {before:.4f} GB")
    
    peak_memory[0] = before
    monitoring[0] = True
    monitor_thread = threading.Thread(target=continuous_monitor)
    monitor_thread.start()
    
    start_time = time.time()
    gray_opencv = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    end_time = time.time()
    
    monitoring[0] = False
    monitor_thread.join()
    
    after = get_memory_usage()
    print(f"完成后: {after:.4f} GB")
    print(f"监控到的峰值: {peak_memory[0]:.4f} GB")
    print(f"峰值增长: +{peak_memory[0] - before:.4f} GB")
    print(f"用时: {end_time - start_time:.3f} 秒")
    print(f"输出类型: {gray_opencv.dtype}")
    
    results['opencv'] = {
        'peak_growth': peak_memory[0] - before,
        'final_growth': after - before,
        'time': end_time - start_time
    }
    
    del gray_opencv
    gc.collect()
    time.sleep(0.1)
    
    # 测试3: 手动实现
    print("\n=== 测试3: 手动实现 ===")
    before = get_memory_usage()
    print(f"开始前: {before:.4f} GB")
    
    peak_memory[0] = before
    monitoring[0] = True
    monitor_thread = threading.Thread(target=continuous_monitor)
    monitor_thread.start()
    
    start_time = time.time()
    # 手动实现，避免中间float64数组
    gray_manual = (0.2125 * image[:,:,0].astype(np.float32) + 
                   0.7154 * image[:,:,1].astype(np.float32) + 
                   0.0721 * image[:,:,2].astype(np.float32)).astype(np.uint8)
    end_time = time.time()
    
    monitoring[0] = False
    monitor_thread.join()
    
    after = get_memory_usage()
    print(f"完成后: {after:.4f} GB")
    print(f"监控到的峰值: {peak_memory[0]:.4f} GB")
    print(f"峰值增长: +{peak_memory[0] - before:.4f} GB")
    print(f"用时: {end_time - start_time:.3f} 秒")
    print(f"输出类型: {gray_manual.dtype}")
    
    results['manual'] = {
        'peak_growth': peak_memory[0] - before,
        'final_growth': after - before,
        'time': end_time - start_time
    }
    
    del gray_manual
    gc.collect()
    
    # 汇总结果
    print("\n" + "="*60)
    print("内存峰值对比:")
    print("-"*60)
    for method, result in results.items():
        print(f"{method:>10}: 峰值增长 {result['peak_growth']:+.3f}GB, "
              f"最终增长 {result['final_growth']:+.3f}GB, "
              f"用时 {result['time']:.3f}s")
    
    # 清理
    del image
    gc.collect()

if __name__ == "__main__":
    import gc
    test_rgb2gray_alternatives()

创建测试图像...
基线内存: 0.3464 GB
图像大小: 263.0 MB

=== 测试1: skimage rgb2gray ===
开始前: 0.3464 GB
完成后: 1.0316 GB
监控到的峰值: 3.0859 GB
峰值增长: +2.7395 GB
用时: 1.253 秒
输出类型: float64

=== 测试2: OpenCV ===
开始前: 0.3466 GB
完成后: 0.4293 GB
监控到的峰值: 0.4293 GB
峰值增长: +0.0827 GB
用时: 0.094 秒
输出类型: uint8

=== 测试3: 手动实现 ===
开始前: 0.3439 GB
完成后: 0.4297 GB
监控到的峰值: 1.0290 GB
峰值增长: +0.6851 GB
用时: 1.049 秒
输出类型: uint8

内存峰值对比:
------------------------------------------------------------
   skimage: 峰值增长 +2.739GB, 最终增长 +0.685GB, 用时 1.253s
    opencv: 峰值增长 +0.083GB, 最终增长 +0.083GB, 用时 0.094s
    manual: 峰值增长 +0.685GB, 最终增长 +0.086GB, 用时 1.049s


# test

In [2]:
import numpy as np
from skimage.color import rgb2gray
import cv2
import time
import psutil
import os

def get_memory_usage():
    """获取当前进程的内存使用量(GB)"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024**3

def efficient_rgb2gray(image):
    """内存高效且与skimage结果相同的rgb2gray"""
    return (0.2125 * image[:,:,0] + 0.7154 * image[:,:,1] + 0.0721 * image[:,:,2]).astype(np.uint8)

def verify_rgb2gray_consistency():
    """验证不同rgb2gray方法的结果一致性"""
    
    print("🔍 RGB2Gray方法一致性验证")
    print("="*60)
    
    # 创建多种测试图像
    test_cases = {
        "随机图像": np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8),
        "纯色测试": np.array([[[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 255]]], dtype=np.uint8),
        "渐变测试": np.array([[[i, j, (i+j)//2] for j in range(256)] for i in range(256)], dtype=np.uint8),
        "边界测试": np.array([[[0, 0, 0], [255, 255, 255], [128, 128, 128]]], dtype=np.uint8)
    }
    
    all_results = {}
    
    for test_name, test_image in test_cases.items():
        print(f"\n📊 测试: {test_name} (形状: {test_image.shape})")
        print("-" * 40)
        
        results = {}
        
        # 方法1: 原始skimage方法
        print("1️⃣ 原始skimage方法...")
        start_mem = get_memory_usage()
        start_time = time.time()
        
        gray_skimage = rgb2gray(test_image)
        gray_skimage_uint8 = (gray_skimage * 255).astype(np.uint8)
        
        end_time = time.time()
        end_mem = get_memory_usage()
        
        results['skimage'] = {
            'result': gray_skimage_uint8,
            'time': end_time - start_time,
            'memory': end_mem - start_mem
        }
        print(f"   时间: {results['skimage']['time']:.4f}s, 内存: +{results['skimage']['memory']:.4f}GB")
        
        # 方法2: 高效替换方法
        print("2️⃣ 高效替换方法...")
        start_mem = get_memory_usage()
        start_time = time.time()
        
        gray_efficient = efficient_rgb2gray(test_image)
        
        end_time = time.time()
        end_mem = get_memory_usage()
        
        results['efficient'] = {
            'result': gray_efficient,
            'time': end_time - start_time,
            'memory': end_mem - start_mem
        }
        print(f"   时间: {results['efficient']['time']:.4f}s, 内存: +{results['efficient']['memory']:.4f}GB")
        
        # 方法3: OpenCV默认方法（作为对比）
        print("3️⃣ OpenCV默认方法（对比）...")
        start_time = time.time()
        
        gray_opencv = cv2.cvtColor(test_image, cv2.COLOR_RGB2GRAY)
        
        end_time = time.time()
        
        results['opencv'] = {
            'result': gray_opencv,
            'time': end_time - start_time,
            'memory': 0  # 假设很小
        }
        print(f"   时间: {results['opencv']['time']:.4f}s")
        
        # 结果对比分析
        print("\n📈 结果分析:")
        
        # skimage vs efficient 对比
        diff_efficient = np.abs(results['skimage']['result'].astype(np.int16) - 
                               results['efficient']['result'].astype(np.int16))
        
        max_diff = diff_efficient.max()
        mean_diff = diff_efficient.mean()
        identical_ratio = (diff_efficient == 0).mean()
        
        print(f"   skimage vs 高效方法:")
        print(f"     最大差异: {max_diff}")
        print(f"     平均差异: {mean_diff:.6f}")
        print(f"     完全相同比例: {identical_ratio:.2%}")
        
        if max_diff == 0:
            print("     ✅ 结果完全一致!")
        elif max_diff <= 1:
            print("     ✅ 结果几乎一致 (差异≤1)")
        else:
            print("     ⚠️ 存在较大差异")
        
        # skimage vs opencv 对比
        diff_opencv = np.abs(results['skimage']['result'].astype(np.int16) - 
                            results['opencv']['result'].astype(np.int16))
        
        print(f"   skimage vs OpenCV:")
        print(f"     最大差异: {diff_opencv.max()}")
        print(f"     平均差异: {diff_opencv.mean():.3f}")
        print(f"     差异>5的像素: {(diff_opencv > 5).mean():.2%}")
        
        # 性能对比
        print(f"\n⚡ 性能对比:")
        print(f"   skimage:   {results['skimage']['time']:.4f}s, {results['skimage']['memory']:+.4f}GB")
        print(f"   高效方法:   {results['efficient']['time']:.4f}s, {results['efficient']['memory']:+.4f}GB")
        print(f"   OpenCV:    {results['opencv']['time']:.4f}s")
        
        if results['efficient']['time'] < results['skimage']['time']:
            speedup = results['skimage']['time'] / results['efficient']['time']
            print(f"   🚀 高效方法快 {speedup:.1f}x")
        
        all_results[test_name] = results
        
        # 显示具体数值对比（对于小图像）
        if test_image.size <= 12:  # 只有很小的图像才显示
            print(f"\n🔢 具体数值对比 (前几个像素):")
            flat_skimage = results['skimage']['result'].flatten()[:min(10, len(results['skimage']['result'].flatten()))]
            flat_efficient = results['efficient']['result'].flatten()[:min(10, len(results['efficient']['result'].flatten()))]
            flat_opencv = results['opencv']['result'].flatten()[:min(10, len(results['opencv']['result'].flatten()))]
            
            print(f"   skimage:  {flat_skimage}")
            print(f"   高效方法:  {flat_efficient}")
            print(f"   OpenCV:   {flat_opencv}")
    
    # 总结报告
    print("\n" + "="*60)
    print("📋 总结报告")
    print("="*60)
    
    all_identical = True
    total_speedup = 0
    total_memory_saved = 0
    
    for test_name, results in all_results.items():
        diff = np.abs(results['skimage']['result'].astype(np.int16) - 
                     results['efficient']['result'].astype(np.int16))
        is_identical = diff.max() == 0
        all_identical = all_identical and is_identical
        
        if results['skimage']['time'] > 0:
            speedup = results['skimage']['time'] / results['efficient']['time']
            total_speedup += speedup
        
        memory_saved = results['skimage']['memory'] - results['efficient']['memory']
        total_memory_saved += memory_saved
        
        status = "✅" if is_identical else "⚠️"
        print(f"{status} {test_name}: 数值差异={diff.max()}, 速度提升={speedup:.1f}x, 内存节省={memory_saved:.4f}GB")
    
    print(f"\n🎯 最终结论:")
    if all_identical:
        print("✅ 所有测试用例的结果都完全一致!")
    else:
        print("⚠️ 部分测试用例存在微小差异")
    
    avg_speedup = total_speedup / len(all_results)
    avg_memory_saved = total_memory_saved / len(all_results)
    
    print(f"📊 平均性能提升:")
    print(f"   速度: {avg_speedup:.1f}x 更快")
    print(f"   内存: {avg_memory_saved:.4f}GB 节省")
    
    print(f"\n🔧 推荐替换:")
    print("将以下代码:")
    print("   gray_image = rgb2gray(image)")
    print("   gray_image = (gray_image * 255).astype(np.uint8)")
    print()
    print("替换为:")
    print("   gray_image = (0.2125 * image[:,:,0] + 0.7154 * image[:,:,1] + 0.0721 * image[:,:,2]).astype(np.uint8)")
    print()
    if all_identical:
        print("✅ 保证结果完全一致!")
    
    return all_results

def test_with_real_size():
    """使用接近真实尺寸的图像测试"""
    print("\n" + "="*60)
    print("🔬 真实尺寸测试 (模拟你的图像大小)")
    print("="*60)
    
    # 创建接近你的图像尺寸的测试图像
    print("创建测试图像 (9872, 9312, 3)...")
    test_image = np.random.randint(0, 255, (9872, 9312, 3), dtype=np.uint8)
    
    print(f"图像大小: {test_image.nbytes / 1024**2:.1f} MB")
    
    methods = {
        'skimage': lambda img: (rgb2gray(img) * 255).astype(np.uint8),
        'efficient': efficient_rgb2gray,
        'opencv': lambda img: cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    }
    
    results = {}
    
    for name, method in methods.items():
        print(f"\n测试 {name} 方法...")
        
        start_mem = get_memory_usage()
        start_time = time.time()
        
        try:
            result = method(test_image)
            
            end_time = time.time()
            end_mem = get_memory_usage()
            
            results[name] = {
                'success': True,
                'time': end_time - start_time,
                'memory_peak': end_mem - start_mem,
                'result_shape': result.shape,
                'result_dtype': result.dtype,
                'result_mean': result.mean(),
                'result_std': result.std()
            }
            
            print(f"   ✅ 成功: {end_time-start_time:.2f}s, 峰值内存: +{end_mem-start_mem:.3f}GB")
            print(f"   结果: {result.shape}, {result.dtype}, 均值={result.mean():.1f}")
            
        except Exception as e:
            print(f"   ❌ 失败: {e}")
            results[name] = {'success': False, 'error': str(e)}
    
    # 比较结果
    if results['skimage']['success'] and results['efficient']['success']:
        print(f"\n🔍 结果对比:")
        
        # 重新生成小样本进行精确比较
        small_test = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
        small_skimage = (rgb2gray(small_test) * 255).astype(np.uint8)
        small_efficient = efficient_rgb2gray(small_test)
        
        diff = np.abs(small_skimage.astype(np.int16) - small_efficient.astype(np.int16))
        print(f"   数值差异 (100x100样本): 最大={diff.max()}, 平均={diff.mean():.6f}")
        
        if diff.max() == 0:
            print("   ✅ 方法完全等价!")
        
        # 性能对比
        skimage_time = results['skimage']['time']
        efficient_time = results['efficient']['time']
        skimage_mem = results['skimage']['memory_peak']
        efficient_mem = results['efficient']['memory_peak']
        
        print(f"\n📊 性能对比:")
        print(f"   时间: skimage={skimage_time:.2f}s vs efficient={efficient_time:.2f}s (快{skimage_time/efficient_time:.1f}x)")
        print(f"   内存: skimage={skimage_mem:.3f}GB vs efficient={efficient_mem:.3f}GB (省{skimage_mem-efficient_mem:.3f}GB)")

if __name__ == "__main__":
    # 运行验证
    verify_rgb2gray_consistency()
    
    # 测试真实尺寸
    test_with_real_size()
    
    print("\n🎉 验证完成!")

🔍 RGB2Gray方法一致性验证

📊 测试: 随机图像 (形状: (100, 100, 3))
----------------------------------------
1️⃣ 原始skimage方法...
   时间: 0.0450s, 内存: +0.0007GB
2️⃣ 高效替换方法...
   时间: 0.0019s, 内存: +0.0000GB
3️⃣ OpenCV默认方法（对比）...
   时间: 0.0031s

📈 结果分析:
   skimage vs 高效方法:
     最大差异: 0
     平均差异: 0.000000
     完全相同比例: 100.00%
     ✅ 结果完全一致!
   skimage vs OpenCV:
     最大差异: 31
     平均差异: 9.749
     差异>5的像素: 67.39%

⚡ 性能对比:
   skimage:   0.0450s, +0.0007GB
   高效方法:   0.0019s, +0.0000GB
   OpenCV:    0.0031s
   🚀 高效方法快 23.6x

📊 测试: 纯色测试 (形状: (1, 4, 3))
----------------------------------------
1️⃣ 原始skimage方法...
   时间: 0.0010s, 内存: +0.0000GB
2️⃣ 高效替换方法...
   时间: 0.0000s, 内存: +0.0000GB
3️⃣ OpenCV默认方法（对比）...
   时间: 0.0000s

📈 结果分析:
   skimage vs 高效方法:
     最大差异: 0
     平均差异: 0.000000
     完全相同比例: 100.00%
     ✅ 结果完全一致!
   skimage vs OpenCV:
     最大差异: 32
     平均差异: 16.250
     差异>5的像素: 75.00%

⚡ 性能对比:
   skimage:   0.0010s, +0.0000GB
   高效方法:   0.0000s, +0.0000GB
   OpenCV:    0.0000s
   🚀 高效方法快 27.6x

🔢 具体数值对比 (前几

In [3]:
import numpy as np
import psutil
import os
import time
from skimage.color import rgb2gray
import cv2

def get_memory_usage():
    """获取当前进程的内存使用量(GB)"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024**3

def memory_efficient_skimage_rgb2gray(image):
    """
    内存最优的skimage等价rgb2gray实现
    避免创建多个临时float64数组
    """
    # 方法1：使用float32减少内存，然后累积计算
    h, w = image.shape[:2]
    result = np.zeros((h, w), dtype=np.float32)
    
    # 逐通道累积，避免同时存在多个大数组
    result += image[:,:,0].astype(np.float32) * 0.2125
    result += image[:,:,1].astype(np.float32) * 0.7154  
    result += image[:,:,2].astype(np.float32) * 0.0721
    
    return result.astype(np.uint8)

def ultra_memory_efficient_skimage_rgb2gray(image):
    """
    极致内存优化的skimage等价实现
    使用就地操作，最小化临时数组
    """
    # 直接在uint8范围内计算，避免float转换
    # 注意：需要用更大的整数类型避免溢出
    result = (image[:,:,0].astype(np.uint32) * 54 +    # 0.2125 * 255 ≈ 54
              image[:,:,1].astype(np.uint32) * 183 +   # 0.7154 * 255 ≈ 183  
              image[:,:,2].astype(np.uint32) * 18) // 255  # 0.0721 * 255 ≈ 18
    
    return result.astype(np.uint8)

def chunked_skimage_rgb2gray(image, chunk_size=1000):
    """
    分块处理的skimage等价实现
    对于超大图像，分块处理减少峰值内存
    """
    h, w = image.shape[:2]
    result = np.zeros((h, w), dtype=np.uint8)
    
    # 按行分块处理
    for start_row in range(0, h, chunk_size):
        end_row = min(start_row + chunk_size, h)
        chunk = image[start_row:end_row]
        
        # 对chunk应用skimage权重
        gray_chunk = (0.2125 * chunk[:,:,0] + 
                     0.7154 * chunk[:,:,1] + 
                     0.0721 * chunk[:,:,2]).astype(np.uint8)
        
        result[start_row:end_row] = gray_chunk
    
    return result

def compare_memory_methods():
    """对比不同内存优化方法"""
    
    print("🔍 内存优化方法对比")
    print("="*60)
    
    # 创建测试图像
    test_shape = (2000, 2000, 3)  # 适中大小用于测试
    print(f"测试图像: {test_shape}, 大小: {np.prod(test_shape)/1024**2:.1f}MB")
    
    test_image = np.random.randint(0, 255, test_shape, dtype=np.uint8)
    
    methods = {
        'skimage原版': lambda img: (rgb2gray(img) * 255).astype(np.uint8),
        'OpenCV': lambda img: cv2.cvtColor(img, cv2.COLOR_RGB2GRAY),
        '原始替换': lambda img: (0.2125 * img[:,:,0] + 0.7154 * img[:,:,1] + 0.0721 * img[:,:,2]).astype(np.uint8),
        '内存优化版': memory_efficient_skimage_rgb2gray,
        '极致优化版': ultra_memory_efficient_skimage_rgb2gray,
        '分块处理版': chunked_skimage_rgb2gray
    }
    
    results = {}
    
    for name, method in methods.items():
        print(f"\n📊 测试: {name}")
        print("-" * 30)
        
        try:
            # 垃圾回收
            import gc
            gc.collect()
            
            start_mem = get_memory_usage()
            start_time = time.time()
            
            result = method(test_image)
            
            end_time = time.time()
            peak_mem = get_memory_usage()
            
            # 再次垃圾回收检查最终内存
            gc.collect()
            final_mem = get_memory_usage()
            
            results[name] = {
                'success': True,
                'time': end_time - start_time,
                'peak_memory': peak_mem - start_mem,
                'final_memory': final_mem - start_mem,
                'result': result,
                'mean': result.mean(),
                'std': result.std()
            }
            
            print(f"   时间: {results[name]['time']:.3f}s")
            print(f"   峰值内存: +{results[name]['peak_memory']:.3f}GB")
            print(f"   最终内存: +{results[name]['final_memory']:.3f}GB")
            print(f"   结果统计: 均值={results[name]['mean']:.1f}, 标准差={results[name]['std']:.1f}")
            
        except Exception as e:
            print(f"   ❌ 失败: {e}")
            results[name] = {'success': False, 'error': str(e)}
    
    # 结果对比
    print(f"\n" + "="*60)
    print("📈 精确度对比 (vs skimage)")
    print("="*60)
    
    if results['skimage原版']['success']:
        reference = results['skimage原版']['result']
        
        for name, result in results.items():
            if result['success'] and name != 'skimage原版':
                diff = np.abs(reference.astype(np.int16) - result['result'].astype(np.int16))
                max_diff = diff.max()
                mean_diff = diff.mean()
                identical_pct = (diff == 0).mean() * 100
                
                status = "✅" if max_diff <= 1 else "⚠️"
                print(f"{status} {name:>12}: 最大差异={max_diff:>2}, 平均差异={mean_diff:.3f}, 相同率={identical_pct:.1f}%")
    
    # 性能总结
    print(f"\n" + "="*60)
    print("⚡ 性能总结")
    print("="*60)
    
    print(f"{'方法':>12} {'时间(s)':>8} {'峰值内存(GB)':>12} {'推荐度':>8}")
    print("-" * 50)
    
    recommendations = {
        'skimage原版': "❌",
        'OpenCV': "⚠️",  # 快但结果不同
        '原始替换': "⚠️",  # 准确但内存多
        '内存优化版': "✅",
        '极致优化版': "🏆",
        '分块处理版': "🔧"  # 适合超大图像
    }
    
    for name, result in results.items():
        if result['success']:
            rec = recommendations.get(name, "?")
            print(f"{name:>12} {result['time']:>8.3f} {result['peak_memory']:>12.3f} {rec:>8}")
    
    return results

def get_recommended_implementation():
    """获取推荐的最终实现"""
    
    print(f"\n" + "="*60)
    print("🎯 推荐的最终实现")
    print("="*60)
    
    print("对于你的使用场景，推荐使用以下代码替换：")
    print()
    print("```python")
    print("def efficient_skimage_rgb2gray(image):")
    print('    """内存高效且与skimage结果一致的rgb2gray"""')
    print("    h, w = image.shape[:2]")
    print("    result = np.zeros((h, w), dtype=np.float32)")
    print("    ")
    print("    # 逐通道累积，避免同时存在多个大数组")
    print("    result += image[:,:,0].astype(np.float32) * 0.2125")
    print("    result += image[:,:,1].astype(np.float32) * 0.7154")
    print("    result += image[:,:,2].astype(np.float32) * 0.0721")
    print("    ")
    print("    return result.astype(np.uint8)")
    print()
    print("# 替换原来的两行：")
    print("# gray_image = rgb2gray(image)")
    print("# gray_image = (gray_image * 255).astype(np.uint8)")
    print("# ")
    print("# 改为：")
    print("gray_image = efficient_skimage_rgb2gray(image)")
    print("```")
    print()
    print("优势：")
    print("✅ 与skimage结果完全一致")
    print("✅ 内存使用比原方法少很多")
    print("✅ 比OpenCV的内存占用稍高，但保证数值一致性")
    print("✅ 速度比原skimage快很多")

if __name__ == "__main__":
    results = compare_memory_methods()
    get_recommended_implementation()

🔍 内存优化方法对比
测试图像: (2000, 2000, 3), 大小: 11.4MB

📊 测试: skimage原版
------------------------------
   时间: 0.084s
   峰值内存: +0.004GB
   最终内存: +0.004GB
   结果统计: 均值=126.5, 标准差=55.2

📊 测试: OpenCV
------------------------------
   时间: 0.001s
   峰值内存: +0.000GB
   最终内存: +0.000GB
   结果统计: 均值=127.0, 标准差=49.2

📊 测试: 原始替换
------------------------------
   时间: 0.039s
   峰值内存: +0.030GB
   最终内存: +0.030GB
   结果统计: 均值=126.5, 标准差=55.2

📊 测试: 内存优化版
------------------------------
   时间: 0.014s
   峰值内存: +0.000GB
   最终内存: +0.000GB
   结果统计: 均值=126.5, 标准差=55.2

📊 测试: 极致优化版
------------------------------
   时间: 0.018s
   峰值内存: +0.000GB
   最终内存: +0.000GB
   结果统计: 均值=126.5, 标准差=55.3

📊 测试: 分块处理版
------------------------------
   时间: 0.022s
   峰值内存: +0.000GB
   最终内存: +0.000GB
   结果统计: 均值=126.5, 标准差=55.2

📈 精确度对比 (vs skimage)
⚠️       OpenCV: 最大差异=33, 平均差异=9.726, 相同率=3.0%
✅         原始替换: 最大差异= 1, 平均差异=0.000, 相同率=100.0%
✅        内存优化版: 最大差异= 1, 平均差异=0.000, 相同率=100.0%
✅        极致优化版: 最大差异= 1, 平均差异=0.170, 相同率=83.0%
✅      