In [9]:
from PIL import Image
import io
import base64

def compress_image(image_path, quality=75, max_size=(1024, 1024)):
    """压缩图片并返回BytesIO对象"""
    with Image.open(image_path) as img:
        # 转换RGBA为RGB（如果需要）
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        
        img.thumbnail(max_size, Image.Resampling.LANCZOS)
        buffer = io.BytesIO()
        img.save(buffer, format="JPEG", quality=quality)
        buffer.seek(0)
        return buffer

In [16]:
import tracemalloc
from functools import wraps

def measure_memory(func):
    """
    测量函数执行时的内存占用情况
    
    返回:
        tuple: (函数返回值, 内存增加量(MB))
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        tracemalloc.start()
        snapshot1 = tracemalloc.take_snapshot()
        
        result = func(*args, **kwargs)
        
        snapshot2 = tracemalloc.take_snapshot()
        top_stats = snapshot2.compare_to(snapshot1, 'lineno')
        
        # 计算总内存增加量 (单位: MB)
        total_mem_diff = sum(stat.size_diff for stat in top_stats) / (1024 * 1024)
        
        # 打印内存占用最大的前10项
        print(f"函数 {func.__name__} 内存增加: {total_mem_diff:.2f} MB")
        print("[ 内存占用前10的项 ]")
        
        # 兼容不同Python版本的StatisticDiff属性
        for stat in top_stats[:10]:
            # 尝试获取行号信息（兼容不同Python版本）
            if hasattr(stat, 'lineno'):
                line_info = f"{stat.filename}:{stat.lineno}"
            elif hasattr(stat, 'traceback') and stat.traceback:
                line_info = f"{stat.traceback[0].filename}:{stat.traceback[0].lineno}"
            else:
                line_info = "未知位置"
                
            print(f"{line_info}: {stat.size_diff / (1024):.2f} KB")
        
        tracemalloc.stop()
        return result, total_mem_diff
    
    return wrapper



In [19]:
import time
import requests
import json
@measure_memory
def run_gpt_api(prompt, base64_image):
    """
    请求gpt
    """
    # API端点
    url = "https://api.chatanywhere.tech/v1/chat/completions"
    
    # 设置请求头
    headers = {
        "Content-Type": "application/json",
        "Authorization": "sk-Qi2svs6TgE7uEVfKJZ3VAkOf3kT5IRYn8Xb2obJq5MdyCRqa"  # 替换为你的API密钥
    }    
    # 设置请求体
    data = {
        "model": "gpt-4o-mini",  # 模型名称 - openai最新模型
        "messages": [
            {"role": "user", "content": [
                {"type": "text", "text": prompt},  # 用户问题
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                    }
                }
            ]},
        ],
        "max_tokens": 2000, 
        "temperature": 0.7,  
        "n": 1  
    }
    
    # 发送POST请求
    response = requests.post(url, headers=headers, data=json.dumps(data))
    
    # 解析响应
    if response.status_code == 200:
        result = response.json()
        generated_text = result['choices'][0]['message']['content']
        print("生成的文本：", generated_text)
        return generated_text
    else:
        print("请求失败，状态码：", response.status_code)
        print("错误信息：", response.text) 
        return None

In [21]:
# 使用示例
image_path = "/Users/xinciliu/Downloads/TRIPLEX-main/image_analysis/page102.png"
buffer = compress_image(image_path, quality=100)
base64_image = base64.b64encode(buffer.read()).decode('utf-8')
prompt = '''
SYSTEM
You are a senior single-cell bioinformatician.

INPUT
• One composite figure.  Upper row: projection coloured by BATCH.  Lower row: projection coloured by CELL TYPE.  
• The left-most tile is “Unintegrated”; the remaining tiles are candidate methods.

VISUAL RUBRIC  (0 = poor, 5 = excellent)
M1  Batch mixing          – batch colours well intermixed inside clusters.
M2  Graph connectivity    – biologically related clusters remain weakly bridged.
M3  Cell-type silhouette  – cluster borders are crisp, single-coloured.
M4  Cell-type continuity  – each cell type forms one contiguous island.

Score each method.  
Compute total = 0.25(M1+M2+M3+M4).  

### output
only return the sorted json for all methods and its score. {"best method": 4, "2nd method": 3.5, "3rd method": 3...}
###Do not return any details of score, only output the json
'''
start_time = time.time()
data, mem_usage = run_gpt_api(prompt, base64_image)
end_time = time.time()

execution_time = end_time - start_time
print(f"代码执行时间: {execution_time:.6f} 秒")


生成的文本： ```json
{"SATURN": 4, "scGen": 3.5, "Harmony": 3, "SAMap": 2.5, "scVI": 2.5, "Seurat v4 CCA": 2, "Scanorama": 2, "fastMNN": 1.5, "BBKNN": 1.5, "Unintegrated": 1}
```
函数 run_gpt_api 内存增加: 0.01 MB
[ 内存占用前10的项 ]
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/site-packages/urllib3/_collections.py:325: 0.71 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/json/decoder.py:353: 0.64 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/site-packages/urllib3/poolmanager.py:214: 0.39 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/threading.py:231: 0.19 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/site-packages/urllib3/_request_methods.py:52: 0.19 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/site-packages/requests/hooks.py:16: 0.18 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/site-packages/urllib3/poolmanager.py:384: 0.16 KB
/opt/homebrew/anaconda3/envs/myenv/lib/python3.9/site-packages/urllib3/poolmanager.py:118: 0.16 KB
/opt/homebrew/anaconda3/envs/myenv/lib

In [44]:
import json
def generate_json_str(result):
    result = result[result.index("{"):]
    last_index = -1
    for i in range(len(result)):
        if result[i] == '}':
            last_index = i
    result = result[:last_index+1]
    result = json.loads(result)
    #把数据拼成合理的格式
    return result

In [45]:
generate_json_str(data)

{'SATURN': 4.0,
 'scGen': 3.5,
 'fastMNN': 3.5,
 'scVI': 3.5,
 'SAMap': 3.0,
 'Harmony': 3.0,
 'Scanorama': 3.0,
 'Seurat v4 CCA': 3.0,
 'BBKN': 2.5,
 'Unintegrated': 1.0}

In [46]:
import os
all_file = []
for root, dirs, files in os.walk("./image_analysis/"):
    for filename in files:
        file_path = os.path.join(root, filename)
        if "./image_analysis/page" in file_path:
            all_file.append(file_path)

In [47]:
len(all_file)

77

In [48]:
all_file

['./image_analysis/page21.png',
 './image_analysis/page34.png',
 './image_analysis/page36.png',
 './image_analysis/page157158.png',
 './image_analysis/page163164.png',
 './image_analysis/page148.png',
 './image_analysis/page32.png',
 './image_analysis/page26.png',
 './image_analysis/page8.png',
 './image_analysis/page30.png',
 './image_analysis/page24.png',
 './image_analysis/page19.png',
 './image_analysis/page42.png',
 './image_analysis/page104.png',
 './image_analysis/page110.png',
 './image_analysis/page138.png',
 './image_analysis/page154155.png',
 './image_analysis/page43.png',
 './image_analysis/page57.png',
 './image_analysis/page80.png',
 './image_analysis/page94.png',
 './image_analysis/page82.png',
 './image_analysis/page96.png',
 './image_analysis/page69.png',
 './image_analysis/page55.png',
 './image_analysis/page107.png',
 './image_analysis/page112.png',
 './image_analysis/page40.png',
 './image_analysis/page78.png',
 './image_analysis/page116.png',
 './image_analysis/pag

In [49]:
for root, dirs, files in os.walk("./image_analysis/"):
    for filename in files:
        file_path = os.path.join(root, filename)
        if "./image_analysis/page" in file_path:
            name = file_path.replace("./image_analysis/page", "").replace(".png", "")
            buffer = compress_image(file_path, quality=100)
            base64_image = base64.b64encode(buffer.read()).decode('utf-8')
            prompt = '''
            SYSTEM
            You are a senior single-cell bioinformatician.
            
            INPUT
            • One composite figure.  Upper row: projection coloured by BATCH.  Lower row: projection coloured by CELL TYPE.  
            • The left-most tile is “Unintegrated”; the remaining tiles are candidate methods.
            
            VISUAL RUBRIC  (0 = poor, 5 = excellent)
            M1  Batch mixing          – batch colours well intermixed inside clusters.
            M2  Graph connectivity    – biologically related clusters remain weakly bridged.
            M3  Cell-type silhouette  – cluster borders are crisp, single-coloured.
            M4  Cell-type continuity  – each cell type forms one contiguous island.
            
            Score each method.  
            Compute total = 0.25(M1+M2+M3+M4).  
            
            ### output
            only return the sorted json for all methods and its score. {"best method": 4, "2nd method": 3.5, "3rd method": 3...}
            ###Do not return any details of score, only output the json
            '''
            data = run_gpt_api(prompt, base64_image)
            data = generate_json_str(data)
            with open("gpt4o_mini_image_result.txt", "a+") as k:
                k.write(name+"\t"+str(data) + "\n")
            

生成的文本： ```json
{"Harmony": 4.0, "scGate": 3.5, "Semi-supervised STACAS (2)": 3.5, "Semi-supervised STACAS (1)": 3.0, "LIGER clusters": 2.5, "Uncorrected": 1.5}
```
生成的文本： ```json
{"Harmony": 4, "scGen": 3.5, "MNN Correct": 3.5, "ComBat": 3, "limma": 3, "fastMNN": 2.5, "scMerge": 2.5, "LIGER": 2, "BBKNN": 2, "Seurat 3": 1.5, "Seurat 2": 1.5, "ZINB-WaVE": 1, "Scanorama": 1, "Raw": 0.5}
```
生成的文本： ```json
{"Harmony": 4, "scGen": 3.5, "MNN Correct": 3.5, "ComBat": 3.5, "scMerge": 3, "LIGER": 3, "ZINB-WaVE": 2.5, "fastMNN": 2.5, "Seurat 2": 2, "Seurat 3": 2, "limma": 2, "BBKNN": 1.5, "Raw": 1}
```
生成的文本： ```json
{"SAUCIE(Embedding)": 4, "Harmony": 3.5, "scANVI": 3, "scVI": 3, "Scanorama(Embedding)": 3, "FastMNN(Embedding)": 3, "Conos": 2.5, "MNN": 2.5, "Liger": 2, "BBKNN": 2, "DESC": 2, "SAUCIE(Features)": 2, "Scanorama(Features)": 2, "FastMNN(Features)": 2, "Combat": 1.5, "Unintegrated": 1}
```
生成的文本： ```json
{"SAUCIE (Features)": 4, "SAUCIE(Embedding)": 4, "scANVI": 3.5, "FastMNN(Embeddin