In [171]:
import os
from PIL import Image
import torch


In [98]:
base_dir = "test"
gen_dir = os.path.join(base_dir, "generated")
ref_dir = os.path.join(base_dir, "ref")
text_dir = os.path.join(base_dir, "text")

all_ids = sorted([
    fname.split('.')[0]
    for fname in os.listdir(gen_dir)
    if fname.endswith('.png')
])

results = []

for id_ in all_ids:
    gen_path = os.path.join(gen_dir, f"{id_}.png")
    ref_path = os.path.join(ref_dir, f"{id_}.png")
    text_path = os.path.join(text_dir, f"{id_}.txt")

    image_gen = Image.open(gen_path).convert("RGB")
    image_ref = Image.open(ref_path).convert("RGB")

    with open(text_path, 'r') as f:
        text = f.read().strip()

    inputs_gen = processor(text=[text], images=image_gen, return_tensors="pt", padding=True)
    inputs_ref = processor(text=[text], images=image_ref, return_tensors="pt", padding=True)

    with torch.no_grad():
        outputs_gen = model(**inputs_gen)
        outputs_ref = model(**inputs_ref)

    sim_gen = torch.cosine_similarity(outputs_gen.image_embeds, outputs_gen.text_embeds).item()
    sim_ref = torch.cosine_similarity(outputs_ref.image_embeds, outputs_ref.text_embeds).item()

    results.append((id_, sim_gen, sim_ref))


In [14]:
generated_avg = sum(sim_gen for _, sim_gen, _ in results) / len(results)
reference_avg = sum(sim_ref for _, _, sim_ref in results) / len(results)
print(f"Average Similarity - Generated: {generated_avg:.4f}, Reference: {reference_avg:.4f}")

for id_, sim_gen, sim_ref in results:
    print(f"ID: {id_} | Generated: {sim_gen:.4f} | Reference: {sim_ref:.4f}")
for id_, sim_gen, sim_ref in results:
    print(f"Difference: {(sim_gen - sim_ref):.4f}")


Average Similarity - Generated: 0.2305, Reference: 0.2297
ID: 001 | Generated: 0.2789 | Reference: 0.2019
ID: 002 | Generated: 0.1819 | Reference: 0.1859
ID: 003 | Generated: 0.2240 | Reference: 0.2321
ID: 004 | Generated: 0.1824 | Reference: 0.1743
ID: 005 | Generated: 0.2069 | Reference: 0.2138
ID: 006 | Generated: 0.2083 | Reference: 0.2039
ID: 007 | Generated: 0.2316 | Reference: 0.2304
ID: 008 | Generated: 0.2095 | Reference: 0.2138
ID: 009 | Generated: 0.2136 | Reference: 0.2202
ID: 010 | Generated: 0.2250 | Reference: 0.2195
ID: 011 | Generated: 0.2647 | Reference: 0.2790
ID: 012 | Generated: 0.2030 | Reference: 0.2138
ID: 013 | Generated: 0.2034 | Reference: 0.2168
ID: 014 | Generated: 0.2492 | Reference: 0.2546
ID: 015 | Generated: 0.2699 | Reference: 0.2776
ID: 016 | Generated: 0.2809 | Reference: 0.2768
ID: 017 | Generated: 0.2856 | Reference: 0.2899
Difference: 0.0770
Difference: -0.0040
Difference: -0.0081
Difference: 0.0082
Difference: -0.0069
Difference: 0.0044
Differenc

In [178]:
import cv2
import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt
from typing import Dict, List, Tuple, Any
import re
import os
from PIL import Image

class SimplifiedSpatialEvaluator:
    def __init__(self, image_size=(512, 512)):
        self.image_size = image_size
        self.regions = self._define_spatial_regions()
        
    def _define_spatial_regions(self) -> Dict[str, Tuple[slice, slice]]:
        h, w = self.image_size
        return {
            'north': (slice(0, h//3), slice(0, w)),
            'south': (slice(2*h//3, h), slice(0, w)),
            'east': (slice(0, h), slice(2*w//3, w)),
            'west': (slice(0, h), slice(0, w//3)),
            'center': (slice(h//3, 2*h//3), slice(w//3, 2*w//3)),
            'northeast': (slice(0, h//2), slice(w//2, w)),
            'northwest': (slice(0, h//2), slice(0, w//2)),
            'southeast': (slice(h//2, h), slice(w//2, w)),
            'southwest': (slice(h//2, h), slice(0, w//2)),
            'northern': (slice(0, h//2), slice(0, w)),
            'southern': (slice(h//2, h), slice(0, w)),
            'eastern': (slice(0, h), slice(w//2, w)),
            'western': (slice(0, h), slice(0, w//2)),
            'entire_west': (slice(0, h), slice(0, w//2)),
            'entire_east': (slice(0, h), slice(w//2, w)),
            'entire_north': (slice(0, h//2), slice(0, w)),
            'entire_south': (slice(h//2, h), slice(0, w)),
        }

class TerrainDetector:
    @staticmethod
    def detect_mountains(image_region: np.ndarray) -> float:
        if len(image_region.shape) == 3:
            gray = cv2.cvtColor(image_region, cv2.COLOR_RGB2GRAY)
            hsv = cv2.cvtColor(image_region, cv2.COLOR_RGB2HSV)
            rgb = image_region
        else:
            gray = image_region
            hsv = None
            rgb = None
        white_peak_score = 0
        if rgb is not None:
            white_mask = ((rgb[:,:,0] >= 180) & (rgb[:,:,1] >= 180) & (rgb[:,:,2] >= 180))
            light_gray_mask = ((rgb[:,:,0] >= 150) & (rgb[:,:,1] >= 150) & (rgb[:,:,2] >= 150) &
                            (np.abs(rgb[:,:,0] - rgb[:,:,1]) <= 30) & 
                            (np.abs(rgb[:,:,1] - rgb[:,:,2]) <= 30))
            
            peak_mask = white_mask | light_gray_mask
            white_peak_score = np.sum(peak_mask) / peak_mask.size
            
            if white_peak_score >= 0.15:
                white_peak_score = min(white_peak_score * 2.0, 1.0)
        
        edges = cv2.Canny(gray, 30, 120)
        edge_density = np.sum(edges > 0) / edges.size
        
        texture_score = np.std(gray) / 255.0

        mountain_color_score = 0
        if hsv is not None:
            # 棕色山脈
            brown_mask = ((hsv[:,:,0] >= 5) & (hsv[:,:,0] <= 25) & 
                        (hsv[:,:,1] >= 30) & (hsv[:,:,2] >= 40))
            # 灰色山脈
            gray_mask = ((hsv[:,:,1] <= 60) & (hsv[:,:,2] >= 60) & (hsv[:,:,2] <= 220))
            # 綠褐色山脈
            green_brown_mask = ((hsv[:,:,0] >= 20) & (hsv[:,:,0] <= 60) & 
                            (hsv[:,:,1] >= 20) & (hsv[:,:,2] >= 50))
            
            mountain_color_score = (np.sum(brown_mask) + np.sum(gray_mask) + 
                                np.sum(green_brown_mask)) / (hsv.shape[0] * hsv.shape[1])
        
        # 5. 高度變化檢測
        laplacian = cv2.Laplacian(gray, cv2.CV_64F)
        height_variation = np.std(laplacian) / 255.0
        
        # 6. 山峰形狀檢測
        peak_shape_score = 0
        if rgb is not None:
            # 使用形態學操作檢測山峰形狀
            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
            peaks = cv2.morphologyEx(peak_mask.astype(np.uint8), cv2.MORPH_OPEN, kernel)
            
            # 檢測三角形或尖峰狀結構
            contours, _ = cv2.findContours(peaks, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            for contour in contours:
                if cv2.contourArea(contour) > 100:  # 足夠大的區域
                    # 計算凸包
                    hull = cv2.convexHull(contour)
                    hull_area = cv2.contourArea(hull)
                    contour_area = cv2.contourArea(contour)
                    if hull_area > 0:
                        convexity = contour_area / hull_area
                        if 0.6 <= convexity <= 0.95:  # 接近三角形
                            peak_shape_score = max(peak_shape_score, convexity)
        
        # 綜合評分 - 重點強化白色山峰檢測
        mountain_score = (
            white_peak_score * 0.35 +      # 白色山峰是關鍵特徵
            edge_density * 0.2 +
            texture_score * 0.15 +
            mountain_color_score * 0.15 +
            height_variation * 0.1 +
            peak_shape_score * 0.05
        )
        
        return min(mountain_score, 1.0)
    
    @staticmethod
    def detect_seas(image_region: np.ndarray) -> float:
        """檢測海洋特徵"""
        if len(image_region.shape) == 3:
            hsv = cv2.cvtColor(image_region, cv2.COLOR_RGB2HSV)
            gray = cv2.cvtColor(image_region, cv2.COLOR_RGB2GRAY)
        else:
            gray = image_region
            hsv = None
            
        # 1. 藍色檢測
        if hsv is not None:
            blue_mask1 = ((hsv[:,:,0] >= 90) & (hsv[:,:,0] <= 130) & 
                         (hsv[:,:,1] >= 50) & (hsv[:,:,2] >= 50))
            blue_mask2 = ((hsv[:,:,0] >= 100) & (hsv[:,:,0] <= 120) & 
                         (hsv[:,:,1] >= 100) & (hsv[:,:,2] >= 30))
            blue_ratio = (np.sum(blue_mask1) + np.sum(blue_mask2)) / (hsv.shape[0] * hsv.shape[1])
        else:
            blue_ratio = np.sum(gray <= 120) / gray.size
            
        # 2. 平滑度
        smoothness = 1.0 - (np.std(gray) / 255.0)
        
        # 3. 大面積連通性
        if hsv is not None:
            water_mask = blue_mask1 | blue_mask2
        else:
            water_mask = gray <= 120
            
        labeled, num_features = ndimage.label(water_mask)
        if num_features > 0:
            sizes = ndimage.sum(water_mask, labeled, range(num_features + 1))
            max_size = np.max(sizes[1:]) if len(sizes) > 1 else 0
            connectivity_score = max_size / water_mask.size
        else:
            connectivity_score = 0
            
        # 4. 邊緣特徵
        edges = cv2.Canny(gray, 30, 100)
        edge_simplicity = 1.0 - (np.sum(edges > 0) / edges.size)
        
        sea_score = (
            blue_ratio * 0.4 +
            smoothness * 0.2 +
            connectivity_score * 0.3 +
            edge_simplicity * 0.1
        )
        
        return min(sea_score, 1.0)
    
    @staticmethod
    def detect_lakes(image_region: np.ndarray) -> float:
        """檢測湖泊特徵"""
        if len(image_region.shape) == 3:
            hsv = cv2.cvtColor(image_region, cv2.COLOR_RGB2HSV)
            gray = cv2.cvtColor(image_region, cv2.COLOR_RGB2GRAY)
        else:
            gray = image_region
            hsv = None
            
        # 1. 藍色檢測
        if hsv is not None:
            blue_mask = ((hsv[:,:,0] >= 90) & (hsv[:,:,0] <= 130) & 
                        (hsv[:,:,1] >= 30) & (hsv[:,:,2] >= 40))
            blue_ratio = np.sum(blue_mask) / (hsv.shape[0] * hsv.shape[1])
        else:
            blue_ratio = np.sum(gray <= 130) / gray.size
            
        # 2. 中等大小的連通區域
        if hsv is not None:
            water_mask = blue_mask
        else:
            water_mask = gray <= 130
            
        labeled, num_features = ndimage.label(water_mask)
        medium_size_score = 0
        if num_features > 0:
            sizes = ndimage.sum(water_mask, labeled, range(num_features + 1))
            if len(sizes) > 1:
                valid_sizes = sizes[1:]
                total_area = water_mask.size
                for size in valid_sizes:
                    ratio = size / total_area
                    if 0.05 <= ratio <= 0.6:
                        medium_size_score = max(medium_size_score, ratio)
            
        # 3. 平滑度
        smoothness = 1.0 - (np.std(gray) / 255.0)
        
        # 4. 形狀規則性
        shape_score = 0
        if np.sum(water_mask) > 100:
            contours, _ = cv2.findContours(water_mask.astype(np.uint8), 
                                         cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            if contours:
                largest_contour = max(contours, key=cv2.contourArea)
                area = cv2.contourArea(largest_contour)
                perimeter = cv2.arcLength(largest_contour, True)
                if perimeter > 0:
                    circularity = 4 * np.pi * area / (perimeter * perimeter)
                    shape_score = min(circularity, 1.0)
            
        lake_score = (
            blue_ratio * 0.3 +
            medium_size_score * 0.4 +
            smoothness * 0.2 +
            shape_score * 0.1
        )
        
        return min(lake_score, 1.0)
    
    @staticmethod
    def detect_rivers(image_region: np.ndarray) -> float:
        """檢測河流特徵"""
        if len(image_region.shape) == 3:
            hsv = cv2.cvtColor(image_region, cv2.COLOR_RGB2HSV)
            gray = cv2.cvtColor(image_region, cv2.COLOR_RGB2GRAY)
        else:
            gray = image_region
            hsv = None
            
        # 1. 藍色檢測
        if hsv is not None:
            blue_mask = ((hsv[:,:,0] >= 90) & (hsv[:,:,0] <= 130) & 
                        (hsv[:,:,1] >= 30) & (hsv[:,:,2] >= 30))
            blue_ratio = np.sum(blue_mask) / (hsv.shape[0] * hsv.shape[1])
        else:
            blue_ratio = np.sum(gray <= 120) / gray.size
            
        # 2. 細長形狀檢測
        if hsv is not None:
            water_mask = blue_mask
        else:
            water_mask = gray <= 120
            
        # 形態學操作
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        cleaned_mask = cv2.morphologyEx(water_mask.astype(np.uint8), 
                                      cv2.MORPH_CLOSE, kernel)
        
        # 骨架化檢測線性結構
        try:
            skeleton = cv2.ximgproc.thinning(cleaned_mask)
            skeleton_ratio = np.sum(skeleton > 0) / max(np.sum(cleaned_mask > 0), 1)
        except:
            # 如果沒有ximgproc，使用替代方法
            skeleton_ratio = 0.5
        
        # 3. 長寬比檢測
        contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        elongation_score = 0
        if contours:
            for contour in contours:
                if cv2.contourArea(contour) > 50:
                    rect = cv2.minAreaRect(contour)
                    width, height = rect[1]
                    if min(width, height) > 0:
                        aspect_ratio = max(width, height) / min(width, height)
                        if aspect_ratio > 3:
                            elongation_score = max(elongation_score, 
                                                 min(aspect_ratio / 10, 1.0))
        
        # 4. 連續性檢測
        labeled, num_features = ndimage.label(water_mask)
        continuity_score = 1.0 / max(num_features, 1) if num_features > 0 else 0
            
        river_score = (
            blue_ratio * 0.2 +
            skeleton_ratio * 0.3 +
            elongation_score * 0.3 +
            continuity_score * 0.2
        )
        
        return min(river_score, 1.0)

class TextParser:
    def __init__(self):
        self.direction_patterns = {
            'north': r'\b(north|northern|top|upper)\b',
            'south': r'\b(south|southern|bottom|lower)\b', 
            'east': r'\b(east|eastern|right)\b',
            'west': r'\b(west|western|left)\b',
            'center': r'\b(center|central|middle)\b',
            'northeast': r'\b(northeast|north-east|upper right)\b',
            'northwest': r'\b(northwest|north-west|upper left)\b',
            'southeast': r'\b(southeast|south-east|lower right)\b',
            'southwest': r'\b(southwest|south-west|lower left)\b',
            'entire_west': r'\b(entire west|whole west|throughout west)\b',
            'entire_east': r'\b(entire east|whole east|throughout east)\b',
            'entire_north': r'\b(entire north|whole north|throughout north)\b',
            'entire_south': r'\b(entire south|whole south|throughout south)\b',
        }
        
        self.terrain_patterns = {
            'mountains': r'\b(mountain|mountains|mountainous|peak|peaks|hill|hills|highland|range)\b',
            'seas': r'\b(sea|seas|ocean|oceans)\b',
            'lakes': r'\b(lake|lakes|pond|ponds)\b',
            'rivers': r'\b(river|rivers|stream|streams|waterway|waterways)\b',
        }
    
    def parse_description(self, text: str) -> List[Dict[str, Any]]:
        text = text.lower()
        spatial_relations = []
        
        # 1. 先解析複合描述
        compound_relations = self._parse_compound_descriptions(text)
        spatial_relations.extend(compound_relations)
        
        # 2. 解析單獨描述，按句子分割避免錯誤匹配
        individual_relations = self._parse_individual_descriptions(text)
        
        # 3. 合併結果，避免重複
        for rel in individual_relations:
            if not self._is_duplicate_relation(rel, spatial_relations):
                spatial_relations.append(rel)
        
        return spatial_relations
    
    def _parse_compound_descriptions(self, text: str) -> List[Dict[str, Any]]:
        """解析複合描述，如 'center and east are all lake'"""
        relations = []
        
        # 模式1: "A and B are all X" 或 "A and B are X"
        patterns = [
            r'(\w+)\s+and\s+(\w+)\s+are\s+all\s+(\w+)',
            r'(\w+)\s+and\s+(\w+)\s+are\s+(\w+)',
        ]
        
        for pattern in patterns:
            matches = re.finditer(pattern, text)
            for match in matches:
                dir1, dir2, terrain = match.groups()
                
                if (self._is_valid_direction(dir1) and 
                    self._is_valid_direction(dir2) and 
                    self._is_valid_terrain(terrain)):
                    
                    relations.append({
                        'terrain': self._normalize_terrain(terrain),
                        'direction': dir1,
                        'coverage': 'extensive' if 'all' in match.group(0) else 'partial',
                        'confidence': 0.9,
                        'context': match.group(0)
                    })
                    relations.append({
                        'terrain': self._normalize_terrain(terrain),
                        'direction': dir2,
                        'coverage': 'extensive' if 'all' in match.group(0) else 'partial',
                        'confidence': 0.9,
                        'context': match.group(0)
                    })
        
        # 模式2: "X in the A and B"
        pattern2 = r'(\w+)\s+in\s+the\s+(\w+)\s+and\s+(\w+)'
        matches = re.finditer(pattern2, text)
        for match in matches:
            terrain, dir1, dir2 = match.groups()
            
            if (self._is_valid_terrain(terrain) and
                self._is_valid_direction(dir1) and 
                self._is_valid_direction(dir2)):
                
                relations.append({
                    'terrain': self._normalize_terrain(terrain),
                    'direction': dir1,
                    'coverage': 'partial',
                    'confidence': 0.8,
                    'context': match.group(0)
                })
                relations.append({
                    'terrain': self._normalize_terrain(terrain),
                    'direction': dir2,
                    'coverage': 'partial',
                    'confidence': 0.8,
                    'context': match.group(0)
                })
        
        return relations
    
    def _parse_individual_descriptions(self, text: str) -> List[Dict[str, Any]]:
        """解析單獨的地形描述 - 按句子分割"""
        spatial_relations = []
        
        # 按標點符號分割句子，避免跨句匹配
        sentences = re.split(r'[,;]|\s+while\s+|\s+and\s+(?=the)', text)
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
            
            # 在每個句子中尋找地形和對應的方向
            sentence_relations = self._parse_single_sentence(sentence)
            spatial_relations.extend(sentence_relations)
        
        return spatial_relations
    
    def _parse_single_sentence(self, sentence: str) -> List[Dict[str, Any]]:
        """解析單個句子中的地形描述"""
        relations = []
        
        for terrain, terrain_pattern in self.terrain_patterns.items():
            terrain_matches = re.finditer(terrain_pattern, sentence)
            
            for terrain_match in terrain_matches:
                # 在同一句子中尋找方向
                direction_found = False
                
                for direction, direction_pattern in self.direction_patterns.items():
                    if re.search(direction_pattern, sentence):
                        # 確定覆蓋程度
                        coverage = 'partial'
                        if re.search(r'\b(all|entire|whole|cover|covering)\b', sentence):
                            coverage = 'extensive'
                        
                        relations.append({
                            'terrain': terrain,
                            'direction': direction,
                            'coverage': coverage,
                            'confidence': self._calculate_confidence(sentence, terrain, direction),
                            'context': sentence
                        })
                        direction_found = True
                        break
                
                # 如果沒有找到明確方向，標記為一般性描述
                if not direction_found:
                    relations.append({
                        'terrain': terrain,
                        'direction': 'general',
                        'coverage': 'partial',
                        'confidence': 0.3,
                        'context': sentence
                    })
        
        return relations
    
    def _is_valid_direction(self, direction: str) -> bool:
        """檢查是否是有效的方向詞"""
        direction_words = ['north', 'south', 'east', 'west', 'center', 'central', 'middle',
                          'northeast', 'northwest', 'southeast', 'southwest',
                          'northern', 'southern', 'eastern', 'western']
        return direction in direction_words
    
    def _is_valid_terrain(self, terrain: str) -> bool:
        """檢查是否是有效的地形詞"""
        terrain_words = ['mountain', 'mountains', 'mountainous', 'lake', 'lakes', 
                        'sea', 'seas', 'ocean', 'oceans', 'river', 'rivers',
                        'peak', 'peaks', 'hill', 'hills', 'pond', 'ponds']
        return terrain in terrain_words
    
    def _normalize_terrain(self, terrain: str) -> str:
        """標準化地形名稱"""
        if terrain in ['mountain', 'mountains', 'mountainous', 'peak', 'peaks', 'hill', 'hills']:
            return 'mountains'
        elif terrain in ['lake', 'lakes', 'pond', 'ponds']:
            return 'lakes'
        elif terrain in ['sea', 'seas', 'ocean', 'oceans']:
            return 'seas'
        elif terrain in ['river', 'rivers']:
            return 'rivers'
        return terrain
    
    def _is_duplicate_relation(self, new_rel: Dict, existing_rels: List[Dict]) -> bool:
        """檢查是否是重複的關係"""
        for existing in existing_rels:
            if (existing['terrain'] == new_rel['terrain'] and 
                existing['direction'] == new_rel['direction']):
                return True
        return False
    
    def _calculate_confidence(self, context: str, terrain: str, direction: str) -> float:
        """計算關係的可信度"""
        confidence = 0.6  # 基礎信心度
        
        # 距離因子
        terrain_pos = context.find(terrain)
        direction_pos = context.find(direction.replace('_', ' '))
        if terrain_pos != -1 and direction_pos != -1:
            distance = abs(terrain_pos - direction_pos)
            confidence += max(0, (20 - distance) / 20 * 0.2)
        
        # 關鍵詞加分
        if 'all' in context:
            confidence += 0.15
        if 'cover' in context or 'covering' in context:
            confidence += 0.1
        if 'are' in context:
            confidence += 0.05
            
        return min(confidence, 1.0)

class GeographicEvaluator:
    """地理特徵評估器"""
    
    def __init__(self):
        self.spatial_evaluator = SimplifiedSpatialEvaluator()
        self.terrain_detector = TerrainDetector()
        self.text_parser = TextParser()
    
    def evaluate_geographic_accuracy(self, image: np.ndarray, description: str) -> Dict[str, Any]:
        """評估地理描述的準確性"""
        
        # 調整圖像大小
        if image.shape[:2] != self.spatial_evaluator.image_size:
            image = cv2.resize(image, self.spatial_evaluator.image_size)
        
        # 解析文字描述
        spatial_relations = self.text_parser.parse_description(description)
        
        # 評估每個空間關係
        relation_scores = []
        detailed_results = {}
        
        for relation in spatial_relations:
            terrain = relation['terrain']
            direction = relation['direction']
            coverage = relation['coverage']
            
            if direction == 'general':
                terrain_score = self._evaluate_terrain_globally(image, terrain)
            else:
                if direction in self.spatial_evaluator.regions:
                    region_slice = self.spatial_evaluator.regions[direction]
                    region = image[region_slice]
                    
                    if terrain == 'mountains':
                        terrain_score = self.terrain_detector.detect_mountains(region)
                    elif terrain == 'seas':
                        terrain_score = self.terrain_detector.detect_seas(region)
                    elif terrain == 'lakes':
                        terrain_score = self.terrain_detector.detect_lakes(region)
                    elif terrain == 'rivers':
                        terrain_score = self.terrain_detector.detect_rivers(region)
                    else:
                        terrain_score = 0.0
                else:
                    terrain_score = 0.0
            
            if coverage == 'extensive' and direction != 'general':
                coverage_score = self._evaluate_coverage(region, terrain)
                final_score = terrain_score * coverage_score
            else:
                final_score = terrain_score
            
            relation_score = final_score * relation['confidence']
            relation_scores.append(relation_score)
            
            detailed_results[f"{terrain}_{direction}"] = {
                'terrain_score': terrain_score,
                'coverage_score': coverage_score if coverage == 'extensive' else 1.0,
                'confidence': relation['confidence'],
                'final_score': relation_score,
                'context': relation['context']
            }
        
        overall_score = np.mean(relation_scores) if relation_scores else 0.0
        
        return {
            'overall_score': overall_score,
            'individual_relations': detailed_results,
            'parsed_relations': spatial_relations,
            'total_relations_found': len(spatial_relations)
        }
    
    def _evaluate_terrain_globally(self, image: np.ndarray, terrain: str) -> float:
        """全局評估地形特徵"""
        if terrain == 'mountains':
            return self.terrain_detector.detect_mountains(image)
        elif terrain == 'seas':
            return self.terrain_detector.detect_seas(image)
        elif terrain == 'lakes':
            return self.terrain_detector.detect_lakes(image)
        elif terrain == 'rivers':
            return self.terrain_detector.detect_rivers(image)
        return 0.0
    
    def _evaluate_coverage(self, region: np.ndarray, terrain: str) -> float:
        """評估地形覆蓋程度"""
        h, w = region.shape[:2]
        grid_size = 32
        coverage_count = 0
        total_grids = 0
        
        for i in range(0, h, grid_size):
            for j in range(0, w, grid_size):
                grid = region[i:i+grid_size, j:j+grid_size]
                if grid.size > 0:
                    if terrain == 'mountains':
                        score = self.terrain_detector.detect_mountains(grid)
                    elif terrain == 'seas':
                        score = self.terrain_detector.detect_seas(grid)
                    elif terrain == 'lakes':
                        score = self.terrain_detector.detect_lakes(grid)
                    elif terrain == 'rivers':
                        score = self.terrain_detector.detect_rivers(grid)
                    else:
                        score = 0.0
                    
                    if score > 0.3:
                        coverage_count += 1
                    total_grids += 1
        
        return coverage_count / total_grids if total_grids > 0 else 0.0

# 適配您的檔案結構的主要評估函數
def evaluate_geographic_spatial_relations():
    """評估地理空間關係 - 適配您的檔案結構"""
    
    # 初始化評估器
    evaluator = GeographicEvaluator()
    
    # 設定目錄路徑
    base_dir = "test"
    gen_dir = os.path.join(base_dir, "generated")
    ref_dir = os.path.join(base_dir, "ref")
    text_dir = os.path.join(base_dir, "text")
    
    # 獲取所有ID
    all_ids = sorted([
        fname.split('.')[0]
        for fname in os.listdir(gen_dir)
        if fname.endswith('.png')
    ])
    
    results = []
    
    print(f"開始評估 {len(all_ids)} 組圖片的地理空間關係...")
    
    for i, id_ in enumerate(all_ids):
        gen_path = os.path.join(gen_dir, f"{id_}.png")
        ref_path = os.path.join(ref_dir, f"{id_}.png")
        text_path = os.path.join(text_dir, f"{id_}.txt")
        
        try:
            # 載入圖片
            image_gen = Image.open(gen_path).convert("RGB")
            image_ref = Image.open(ref_path).convert("RGB")
            
            # 轉換為numpy陣列
            image_gen_np = np.array(image_gen)
            image_ref_np = np.array(image_ref)
            
            # 讀取文字描述
            with open(text_path, 'r', encoding='utf-8') as f:
                text = f.read().strip()
            
            # 評估生成圖片的地理準確性
            result_gen = evaluator.evaluate_geographic_accuracy(image_gen_np, text)
            
            # 評估參考圖片的地理準確性
            result_ref = evaluator.evaluate_geographic_accuracy(image_ref_np, text)
            
            # 計算分數
            score_gen = result_gen['overall_score']
            score_ref = result_ref['overall_score']
            
            # 決定勝出者
            winner = 'generated' if score_gen > score_ref else 'reference'
            score_diff = abs(score_gen - score_ref)
            
            results.append({
                'id': id_,
                'score_gen': score_gen,
                'score_ref': score_ref,
                'winner': winner,
                'score_difference': score_diff,
                'text': text,
                'details_gen': result_gen,
                'details_ref': result_ref
            })
            
            # 進度顯示
            if (i + 1) % 10 == 0 or i == len(all_ids) - 1:
                print(f"已處理: {i + 1}/{len(all_ids)}")
        
        except Exception as e:
            print(f"處理 {id_} 時發生錯誤: {e}")
            continue
    
    return results

def analyze_results(results):
    """分析評估結果"""
    if not results:
        print("沒有結果可分析")
        return
    
    # 統計資訊
    total_count = len(results)
    gen_wins = sum(1 for r in results if r['winner'] == 'generated')
    ref_wins = sum(1 for r in results if r['winner'] == 'reference')
    
    avg_score_gen = np.mean([r['score_gen'] for r in results])
    avg_score_ref = np.mean([r['score_ref'] for r in results])
    avg_score_diff = np.mean([r['score_difference'] for r in results])
    
    print("=== 地理空間關係評估結果 ===")
    print(f"總數量: {total_count}")
    print(f"生成圖片較高: {gen_wins} ({gen_wins/total_count*100:.1f}%)")
    print(f"原始圖片較高: {ref_wins} ({ref_wins/total_count*100:.1f}%)")
    print(f"平均分數 - 生成: {avg_score_gen:.4f}")
    print(f"平均分數 - 原始: {avg_score_ref:.4f}")
    print(f"平均分數差距: {avg_score_diff:.4f}")
    
def save_detailed_results(results, output_file='geographic_evaluation_results.txt'):
    """保存詳細結果到檔案"""
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("地理空間關係評估詳細結果\n")
        f.write("=" * 50 + "\n\n")
        
        for result in results:
            f.write(f"ID: {result['id']}\n")
            f.write(f"描述: {result['text']}\n")
            f.write(f"生成圖片分數: {result['score_gen']:.4f}\n")
            f.write(f"原始圖片分數: {result['score_ref']:.4f}\n")
            f.write(f"較高者: {result['winner']}\n")
            f.write(f"分數差距: {result['score_difference']:.4f}\n")
            f.write("\n生成圖片分析:\n")
            for relation, details in result['details_gen']['individual_relations'].items():
                f.write(f"  {relation}: {details['final_score']:.4f}\n")
            
            f.write("\n原始圖片分析:\n")
            for relation, details in result['details_ref']['individual_relations'].items():
                f.write(f"  {relation}: {details['final_score']:.4f}\n")
            f.write("\n" + "-" * 50 + "\n\n")
    
    print(f"詳細結果已保存至: {output_file}")

# 主執行函數
def main():
    results = evaluate_geographic_spatial_relations()
    analyze_results(results)
    save_detailed_results(results)
    
    return results

if __name__ == "__main__":
    results = main()


開始評估 30 組圖片的地理空間關係...
已處理: 10/30
已處理: 20/30
已處理: 30/30
=== 地理空間關係評估結果 ===
總數量: 30
生成圖片較高: 27 (90.0%)
原始圖片較高: 3 (10.0%)
平均分數 - 生成: 0.3544
平均分數 - 原始: 0.2710
平均分數差距: 0.0853
詳細結果已保存至: geographic_evaluation_results.txt
