In [8]:
import numpy as np
import os

def load_3dgs_merged_data(merged_dir):
    """
    병합된 3DGS 데이터를 로드.
    
    Args:
        merged_dir (str): 병합된 데이터가 저장된 디렉토리 경로.
    
    Returns:
        dict: 병합된 데이터 딕셔너리.
    """
    # 병합된 데이터는 .npy 파일로 저장되었다고 가정
    data = np.load(merged_dir, allow_pickle=True).item()
    return {
        'coord': data['points_3dgs'],  # 3DGS 포인트 좌표
        'color': data['colors_3dgs'],
        'normal': data['normals_3dgs'],
        'segment20': data['labels_3dgs'],
        'segment200': data['labels200_3dgs'],
        'instance': data['instances_3dgs']
    }
def load_pointcept_data(pointcept_dir):
    """
    Pointcept 데이터를 .npy 파일에서 로드.
    
    Args:
        pointcept_dir (str): Pointcept 데이터 디렉토리 (예: scannet/train/scene0000_00).
    
    Returns:
        dict: 로드된 데이터 (coord, color, normal, segment20, segment200, instance).
              segment200과 instance는 선택적으로 로드됨.
    """
    data = {}
    # 필수 데이터 로드
    required_keys = ['coord', 'color', 'normal', 'segment20']
    for key in required_keys:
        file_path = os.path.join(pointcept_dir, f"{key}.npy")
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"{key}.npy not found in {pointcept_dir}")
        data[key] = np.load(file_path)

    # 음수 값 경고 (필터링 또는 변환하지 않음)
    if np.any(data['segment20'] < 0):
        print(f"Warning: Negative values found in segment20: {data['segment20'][data['segment20'] < 0]}")
        print(f"Keeping negative values as ignore_index (-1) following Pointcept's approach")

    # 선택적 데이터 로드 (segment200, instance)
    optional_keys = ['segment200', 'instance']
    for key in optional_keys:
        file_path = os.path.join(pointcept_dir, f"{key}.npy")
        if os.path.exists(file_path):
            data[key] = np.load(file_path)
            # 음수 값 경고 (필터링 또는 변환하지 않음)
            if np.any(data[key] < 0):
                print(f"Warning: Negative values found in {key}: {data[key][data[key] < 0]}")
                print(f"Keeping negative values as ignore_index (-1) following Pointcept's approach")
        else:
            print(f"Warning: {key}.npy not found in {pointcept_dir}, setting to ignore_index (-1)")
            data[key] = np.full_like(data['segment20'], -1, dtype=np.int64)

    print(f"Loaded Pointcept data from {pointcept_dir}: {data['coord'].shape[0]} points")
    print(f"Segment20 label distribution: min={data['segment20'].min()}, max={data['segment20'].max()}")
    return data

In [11]:
import numpy as np

def validate_pointcept_data(data, scene_name="unknown"):
    """
    Pointcept 데이터의 포맷과 값을 검증.
    
    Args:
        data (dict): Pointcept 또는 병합된 데이터 딕셔너리.
        scene_name (str): 장면 이름 (로그용).
    
    Returns:
        dict: 검증 결과 및 통계 정보.
    """
    print(f"\nValidating data for scene: {scene_name}")
    required_keys = ['coord', 'color', 'normal', 'segment20']
    optional_keys = ['segment200', 'instance']
    all_keys = required_keys + optional_keys

    # 검증 결과 저장
    validation_results = {
        'scene_name': scene_name,
        'point_count': 0,
        'format_errors': [],
        'stats': {}
    }

    # 1. 키 존재 여부 확인
    for key in required_keys:
        if key not in data:
            validation_results['format_errors'].append(f"Required key '{key}' not found")
    for key in data:
        if key not in all_keys:
            validation_results['format_errors'].append(f"Unknown key '{key}' found")

    # 2. 데이터 타입 및 차원 확인
    num_points = len(data['coord'])
    validation_results['point_count'] = num_points
    checks = {
        'coord': {'dtype': np.float32, 'shape': (num_points, 3)},
        'color': {'dtype': np.uint8, 'shape': (num_points, 3)},
        'normal': {'dtype': np.float32, 'shape': (num_points, 3)},
        'segment20': {'dtype': np.int64, 'shape': (num_points,)},
        'segment200': {'dtype': np.int64, 'shape': (num_points,)},
        'instance': {'dtype': np.int64, 'shape': (num_points,)}
    }

    for key, check in checks.items():
        if key not in data:
            continue
        d = data[key]
        # 데이터 타입 확인
        if d.dtype != check['dtype']:
            validation_results['format_errors'].append(
                f"'{key}' has incorrect dtype. Expected {check['dtype']}, got {d.dtype}")
        # 차원 확인
        if d.shape != check['shape']:
            validation_results['format_errors'].append(
                f"'{key}' has incorrect shape. Expected {check['shape']}, got {d.shape}")

    # 3. 값 범위 확인 및 통계 계산
    # coord: 유한한 값인지 확인
    if not np.all(np.isfinite(data['coord'])):
        validation_results['format_errors'].append("'coord' contains non-finite values (inf or nan)")
    validation_results['stats']['coord_mean'] = np.mean(data['coord'], axis=0).tolist()
    validation_results['stats']['coord_std'] = np.std(data['coord'], axis=0).tolist()

    # color: 0~255 사이
    if np.any(data['color'] < 0) or np.any(data['color'] > 255):
        validation_results['format_errors'].append(
            f"'color' values must be between 0 and 255. Min: {data['color'].min()}, Max: {data['color'].max()}")
    validation_results['stats']['color_mean'] = np.mean(data['color'], axis=0).tolist()
    validation_results['stats']['color_std'] = np.std(data['color'], axis=0).tolist()

    # normal: 노름이 1에 가까운지 확인
    norms = np.linalg.norm(data['normal'], axis=1)
    norm_deviation = np.abs(norms - 1.0)
    if np.any(norm_deviation > 0.1):
        validation_results['format_errors'].append(
            f"'normal' vectors should be unit vectors. Max deviation: {norm_deviation.max():.4f}")
    validation_results['stats']['normal_norm_mean'] = float(np.mean(norms))
    validation_results['stats']['normal_norm_std'] = float(np.std(norms))

    # segment20: 0~19 사이 (또는 -1)
    segment20 = data['segment20']
    invalid_mask = (segment20 < -1) | ((segment20 >= 20) & (segment20 != -1))
    if np.any(invalid_mask):
        validation_results['format_errors'].append(
            f"'segment20' contains invalid values. Expected -1 or 0~19, got min={segment20.min()}, max={segment20.max()}")
    ignore_ratio = np.mean(segment20 == -1)
    validation_results['stats']['segment20_ignore_ratio'] = float(ignore_ratio)

    # segment200: 0~199 사이 (또는 -1)
    if 'segment200' in data:
        segment200 = data['segment200']
        invalid_mask = (segment200 < -1) | ((segment200 >= 200) & (segment200 != -1))
        if np.any(invalid_mask):
            validation_results['format_errors'].append(
                f"'segment200' contains invalid values. Expected -1 or 0~199, got min={segment200.min()}, max={segment200.max()}")
        ignore_ratio = np.mean(segment200 == -1)
        validation_results['stats']['segment200_ignore_ratio'] = float(ignore_ratio)

    # instance: -1 또는 0 이상
    if 'instance' in data:
        instance = data['instance']
        if np.any(instance < -1):
            validation_results['format_errors'].append(
                f"'instance' contains invalid values. Expected -1 or >=0, got min={instance.min()}")
        ignore_ratio = np.mean(instance == -1)
        validation_results['stats']['instance_ignore_ratio'] = float(ignore_ratio)

    # 결과 출력
    if validation_results['format_errors']:
        print("Validation failed with the following errors:")
        for error in validation_results['format_errors']:
            print(f"  - {error}")
    else:
        print("Validation passed successfully.")
    print(f"Point count: {validation_results['point_count']}")
    print("Statistics:")
    for key, value in validation_results['stats'].items():
        print(f"  {key}: {value}")

    return validation_results

# 사용 예시
# Pointcept 데이터 검증
pointcept_dir = "/home/knuvi/Desktop/song/Pointcept/data/scannet/train/scene0010_00"
pointcept_data = load_pointcept_data(pointcept_dir)
pointcept_results = validate_pointcept_data(pointcept_data, "pointcept_scene0010_00")

# 3DGS 병합 데이터 검증
merged_dir = "/home/knuvi/Desktop/song/Pointcept/data/pdistance00005_scale05_keep-dup/train/scene0010_00"  # 병합된 데이터 경로 (사용자가 지정해야 함)
three_dgs_merged_data = load_pointcept_data(merged_dir)
merged_results = validate_pointcept_data(three_dgs_merged_data, "3dgs_merged_scene0010_00")

# 결과 비교
print("\nComparison of Pointcept vs 3DGS Merged Data:")
print(f"Point count: Pointcept={pointcept_results['point_count']}, 3DGS Merged={merged_results['point_count']}")
print("Format Errors:")
print(f"  Pointcept: {len(pointcept_results['format_errors'])} errors")
for error in pointcept_results['format_errors']:
    print(f"    - {error}")
print(f"  3DGS Merged: {len(merged_results['format_errors'])} errors")
for error in merged_results['format_errors']:
    print(f"    - {error}")
print("Statistics Comparison:")
for key in pointcept_results['stats']:
    pointcept_val = pointcept_results['stats'][key]
    merged_val = merged_results['stats'].get(key, "N/A")
    print(f"  {key}: Pointcept={pointcept_val}, 3DGS Merged={merged_val}")

Keeping negative values as ignore_index (-1) following Pointcept's approach
Keeping negative values as ignore_index (-1) following Pointcept's approach
Keeping negative values as ignore_index (-1) following Pointcept's approach
Loaded Pointcept data from /home/knuvi/Desktop/song/Pointcept/data/scannet/train/scene0010_00: 162573 points
Segment20 label distribution: min=-1, max=19

Validating data for scene: pointcept_scene0010_00
Validation passed successfully.
Point count: 162573
Statistics:
  coord_mean: [3.0450615882873535, 2.604457378387451, 0.7587577700614929]
  coord_std: [1.445813536643982, 1.5191539525985718, 0.5272013545036316]
  color_mean: [104.65806745277506, 86.74749189594829, 65.9369944578743]
  color_std: [80.04755646823514, 74.4538527526319, 67.06124811205638]
  normal_norm_mean: 0.9999808073043823
  normal_norm_std: 2.5010729586938396e-05
  segment20_ignore_ratio: 0.4307418821083452
  segment200_ignore_ratio: 0.09110983988731217
  instance_ignore_ratio: 0.09110983988731

In [None]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

def compute_local_consistency(data, k=10):
    """
    로컬 일관성 계산.
    
    Args:
        data (dict): 데이터 딕셔너리 (coord, color, normal, segment20 등 포함).
        k (int): KNN 이웃 수.
    
    Returns:
        dict: 로컬 일관성 메트릭.
    """
    coords = data['coord']
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(coords)
    distances, indices = nbrs.kneighbors(coords)

    metrics = {
        'color_variance': [],
        'normal_cosine_similarity': [],
        'label_consistency': []
    }

    for i in range(len(coords)):
        nearest_indices = indices[i][1:]  # 자기 자신 제외
        nearest_colors = data['color'][nearest_indices]
        nearest_normals = data['normal'][nearest_indices]
        nearest_labels = data['segment20'][nearest_indices]

        # 색상 분산
        color_var = np.var(nearest_colors, axis=0).mean()
        metrics['color_variance'].append(color_var)

        # 법선 코사인 유사도
        normal = data['normal'][i]
        cos_sims = []
        for j in range(len(nearest_normals)):
            norm = np.linalg.norm(normal) * np.linalg.norm(nearest_normals[j])
            if norm > 0:
                cos_sim = np.dot(normal, nearest_normals[j]) / norm
                cos_sims.append(cos_sim)
        metrics['normal_cosine_similarity'].append(np.mean(cos_sims) if cos_sims else 0)

        # 라벨 일치도
        label = data['segment20'][i]
        if label >= 0:  # 유효 라벨만 고려
            consistency = np.mean(nearest_labels == label)
            metrics['label_consistency'].append(consistency)

    # 평균 메트릭 계산
    result = {
        'color_variance': np.mean(metrics['color_variance']),
        'normal_cosine_similarity': np.mean(metrics['normal_cosine_similarity']),
        'label_consistency': np.mean(metrics['label_consistency']) if metrics['label_consistency'] else 0
    }
    return result

def validate_pattern_consistency(pointcept_dir, merged_dir, pointcept_count, scene_name="scene0010_00", k=10):
    """
    Pointcept와 병합 데이터 간 패턴 일치도 검증 (전체 데이터 포함).
    
    Args:
        pointcept_dir (str): Pointcept 데이터 경로.
        merged_dir (str): 병합 데이터 경로.
        pointcept_count (int): 병합 데이터에서 Pointcept 포인트 수.
        scene_name (str): 장면 이름.
        k (int): KNN 이웃 수.
    """
    # 데이터 로드
    pointcept_data = load_pointcept_data(pointcept_dir)
    merged_data = load_pointcept_data(merged_dir)

    # 병합 데이터에서 Pointcept와 3DGS 포인트 분리
    merged_pointcept_data = {
        'coord': merged_data['coord'][:pointcept_count],
        'color': merged_data['color'][:pointcept_count],
        'normal': merged_data['normal'][:pointcept_count],
        'segment20': merged_data['segment20'][:pointcept_count]
    }
    merged_3dgs_data = {
        'coord': merged_data['coord'][pointcept_count:],
        'color': merged_data['color'][pointcept_count:],
        'normal': merged_data['normal'][pointcept_count:],
        'segment20': merged_data['segment20'][pointcept_count:]
    }

    # 로컬 일관성 계산
    pointcept_metrics = compute_local_consistency(pointcept_data, k=k)
    merged_all_metrics = compute_local_consistency(merged_data, k=k)
    merged_3dgs_metrics = compute_local_consistency(merged_3dgs_data, k=k)

    # 결과 비교
    print(f"\nPattern Consistency Validation for {scene_name} (All Data):")
    print("Color Variance (lower is better):")
    print(f"  Pointcept: {pointcept_metrics['color_variance']:.4f}")
    print(f"  Merged (All): {merged_all_metrics['color_variance']:.4f}")
    print(f"  Merged (3DGS part): {merged_3dgs_metrics['color_variance']:.4f}")
    print(f"  Difference (Merged All - Pointcept): {merged_all_metrics['color_variance'] - pointcept_metrics['color_variance']:.4f}")
    print(f"  Difference (Merged 3DGS - Pointcept): {merged_3dgs_metrics['color_variance'] - pointcept_metrics['color_variance']:.4f}")

    print("\nNormal Cosine Similarity (higher is better):")
    print(f"  Pointcept: {pointcept_metrics['normal_cosine_similarity']:.4f}")
    print(f"  Merged (All): {merged_all_metrics['normal_cosine_similarity']:.4f}")
    print(f"  Merged (3DGS part): {merged_3dgs_metrics['normal_cosine_similarity']:.4f}")
    print(f"  Difference (Merged All - Pointcept): {merged_all_metrics['normal_cosine_similarity'] - pointcept_metrics['normal_cosine_similarity']:.4f}")
    print(f"  Difference (Merged 3DGS - Pointcept): {merged_3dgs_metrics['normal_cosine_similarity'] - pointcept_metrics['normal_cosine_similarity']:.4f}")

    print("\nLabel Consistency (higher is better):")
    print(f"  Pointcept: {pointcept_metrics['label_consistency']:.4f}")
    print(f"  Merged (All): {merged_all_metrics['label_consistency']:.4f}")
    print(f"  Merged (3DGS part): {merged_3dgs_metrics['label_consistency']:.4f}")
    print(f"  Difference (Merged All - Pointcept): {merged_all_metrics['label_consistency'] - pointcept_metrics['label_consistency']:.4f}")
    print(f"  Difference (Merged 3DGS - Pointcept): {merged_3dgs_metrics['label_consistency'] - pointcept_metrics['label_consistency']:.4f}")

# 사용 예시
pointcept_dir = "/home/knuvi/Desktop/song/Pointcept/data/scannet/train/scene0010_00"
merged_dir = "/home/knuvi/Desktop/song/Pointcept/data/pdistance00005_scale05_modi-attr/train/scene0010_00"
pointcept_count = 162573  # 병합 로그에서 Pointcept 포인트 수 확인
validate_pattern_consistency(pointcept_dir, merged_dir, pointcept_count, "scene0010_00", k=10)