# 11. 최종 비교: 산업폐기물 vs MatterGen (수화 반응 기반)

## 목적
**수화 시뮬레이션 결과**를 기반으로 기존 산업폐기물 Top 5와  
MatterGen이 생성한 구조를 정량적으로 비교합니다.

---

### 비교 메트릭 (수화 반응 기반)

1. **에너지 변화 (dE)**: 수화 반응의 열역학적 유리성
2. **Ca 용출**: Ca 이동 거리/용출률 (C-S-H 형성 지표)
3. **Si 배위수**: 실리케이트 네트워크 안정성
4. **CO2 저감**: 환경적 잠재력

## 1. 환경 설정

In [None]:
import sys
from pathlib import Path
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# 프로젝트 경로
PROJECT_ROOT = Path.cwd().parent.parent
RESULTS_DIR = PROJECT_ROOT / 'data' / 'results'
FIGURES_DIR = PROJECT_ROOT / 'figures'

print(f"Project Root: {PROJECT_ROOT}")

## 2. 데이터 로딩

In [None]:
# 산업폐기물 수화 데이터 (전체 스크리닝 결과)
with open(RESULTS_DIR / 'pipeline_screening_results.json', encoding='utf-8') as f:
    screening_data = json.load(f)

# Top 5 후보 목록
with open(RESULTS_DIR / 'top_candidates.json', encoding='utf-8') as f:
    top5_data = json.load(f)

TOP5_NAMES = top5_data['top_candidates']

print("Industrial Waste Top 5:")
print("-" * 40)
for name in TOP5_NAMES:
    r = screening_data['results'][name]
    print(f"  {name}: {r['formula']}")

In [None]:
# MatterGen 수화 데이터
with open(RESULTS_DIR / 'mattergen_hydration.json', encoding='utf-8') as f:
    mattergen_data = json.load(f)

print(f"\nMatterGen Hydration Results:")
print("-" * 40)
for r in mattergen_data['results']:
    print(f"  {r['name']}: {r['original_formula']} -> {r['hydrated_formula']}")
    print(f"    dE = {r['energy_change']:.2f} eV")

## 3. 수화 메트릭 추출 및 정규화

In [None]:
# 비교 데이터 구조화
comparison_data = []

# 산업폐기물 Top 5
for name in TOP5_NAMES:
    r = screening_data['results'][name]
    analysis = r.get('analysis', {})
    ca_leaching = analysis.get('ca_leaching', {})
    si_coord = analysis.get('si_coordination', {})
    opt = r.get('optimization', {})
    
    # 에너지 변화 계산 (최적화 전후)
    energy_change = opt.get('final_energy', 0) - opt.get('initial_energy', 0)
    
    comparison_data.append({
        'name': name,
        'source': 'Industrial Waste',
        'formula': r['formula'],
        'energy_change': energy_change,
        'energy_per_atom': opt.get('energy_per_atom', None),
        'ca_leaching_rate': ca_leaching.get('rate_per_ps', 0),
        'ca_final_leached': ca_leaching.get('final_leached', 0),
        'si_coord_mean': si_coord.get('mean', 4.0),
        'co2_reduction': r.get('co2_reduction', 75),
        'original_score': r.get('score', {}).get('total', 0) if isinstance(r.get('score'), dict) else r.get('score', 0)
    })

# MatterGen 구조
for r in mattergen_data['results']:
    csh = r.get('csh_indicators', {})
    
    comparison_data.append({
        'name': r['name'].replace('_opt', ''),
        'source': 'MatterGen',
        'formula': r['original_formula'],
        'energy_change': r['energy_change'],
        'energy_per_atom': r['final_energy'] / 30,  # 대략적 추정
        'ca_displacement': csh.get('ca_displacement', 0),
        'si_coord_mean': csh.get('si_coordination_mean', 0),
        'co2_reduction': 90,  # AI 생성 = 클링커 없음
        'original_score': None
    })

df = pd.DataFrame(comparison_data)
print("Combined hydration data:")
print(df[['name', 'source', 'formula', 'energy_change', 'co2_reduction']].to_string())

In [None]:
# 수화 반응 점수 계산

def calculate_hydration_score(row):
    """
    수화 반응 기반 종합 점수 계산
    - 에너지 변화: 음수가 클수록 좋음 (수화 유리)
    - Ca 활성: 높을수록 좋음 (C-S-H 형성)
    - Si 배위수: 4에 가까울수록 좋음 (네트워크 안정)
    """
    score = 0
    
    # 1. 에너지 변화 점수 (40%)
    # 더 큰 음수 = 더 유리한 수화 반응
    energy = row['energy_change']
    if energy < 0:
        # -200 이하 = 100점, 0 = 50점
        energy_score = min(100, 50 + abs(energy) * 0.25)
    else:
        energy_score = max(0, 50 - energy * 5)
    
    # 2. Ca 활성 점수 (30%)
    if row['source'] == 'Industrial Waste':
        ca_rate = row.get('ca_leaching_rate', 0)
        ca_score = min(100, ca_rate * 100)  # 1.0/ps = 100점
    else:
        ca_disp = row.get('ca_displacement', 0)
        ca_score = min(100, ca_disp * 10)  # 10A = 100점
    
    # 3. Si 배위수 점수 (30%)
    si_coord = row.get('si_coord_mean', 4)
    # 4에 가까울수록 좋음 (정상적인 사면체 배위)
    si_score = max(0, 100 - abs(si_coord - 4) * 25)
    
    # 가중 평균
    total_score = energy_score * 0.4 + ca_score * 0.3 + si_score * 0.3
    
    return total_score, energy_score, ca_score, si_score

# 점수 계산
scores = df.apply(calculate_hydration_score, axis=1)
df['hydration_score'] = [s[0] for s in scores]
df['energy_score'] = [s[1] for s in scores]
df['ca_score'] = [s[2] for s in scores]
df['si_score'] = [s[3] for s in scores]

# CO2 가중 최종 점수
df['final_score'] = df['hydration_score'] * 0.7 + df['co2_reduction'] * 0.3

# 등급 할당
def assign_grade(score):
    if score >= 85:
        return 'A'
    elif score >= 70:
        return 'B'
    elif score >= 55:
        return 'C'
    else:
        return 'D'

df['grade'] = df['final_score'].apply(assign_grade)

print("\nHydration-based scores:")
print(df[['name', 'source', 'energy_score', 'ca_score', 'si_score', 'hydration_score', 'final_score', 'grade']].to_string())

## 4. 최종 순위

In [None]:
# 최종 순위 정렬
df_ranked = df.sort_values('final_score', ascending=False).reset_index(drop=True)
df_ranked['rank'] = range(1, len(df_ranked) + 1)

print("\n" + "=" * 70)
print("FINAL RANKING: Industrial Waste vs MatterGen (Hydration-Based)")
print("=" * 70)
print()

for _, row in df_ranked.iterrows():
    source_tag = "[IW]" if row['source'] == 'Industrial Waste' else "[MG]"
    print(f"  #{row['rank']:2d}  {source_tag} {row['name']:<20} ({row['formula']})")
    print(f"       Hydration: {row['hydration_score']:.1f}  CO2: {row['co2_reduction']}%  Final: {row['final_score']:.1f}  Grade: {row['grade']}")
    print()

## 5. 시각화

In [None]:
# 비교 바 차트
fig, ax = plt.subplots(figsize=(12, 6))

colors = ['#2E86AB' if s == 'Industrial Waste' else '#A23B72' for s in df_ranked['source']]

bars = ax.barh(range(len(df_ranked)), df_ranked['final_score'], color=colors, alpha=0.8)

# 레이블
ax.set_yticks(range(len(df_ranked)))
ax.set_yticklabels([f"#{r['rank']} {r['name']}" for _, r in df_ranked.iterrows()])
ax.set_xlabel('Final Score (Hydration-Based)', fontsize=12)
ax.set_title('Final Comparison: Industrial Waste vs MatterGen\n(Based on Hydration Simulation)', fontsize=14, fontweight='bold')

# 점수 표시
for i, (bar, (_, row)) in enumerate(zip(bars, df_ranked.iterrows())):
    ax.text(bar.get_width() + 0.5, bar.get_y() + bar.get_height()/2,
            f'{row["final_score"]:.1f} ({row["grade"]})',
            va='center', fontsize=10)

# 범례
legend_patches = [
    mpatches.Patch(color='#2E86AB', label='Industrial Waste (Top 5)'),
    mpatches.Patch(color='#A23B72', label='MatterGen (AI-Generated)')
]
ax.legend(handles=legend_patches, loc='lower right')

ax.set_xlim(0, 110)
ax.invert_yaxis()
ax.grid(axis='x', alpha=0.3)
plt.tight_layout()

plt.savefig(FIGURES_DIR / 'final_comparison_hydration.png', dpi=150, bbox_inches='tight')
plt.show()

print("Figure saved: final_comparison_hydration.png")

In [None]:
# 메트릭별 비교
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 색상
colors = ['#2E86AB' if s == 'Industrial Waste' else '#A23B72' for s in df_ranked['source']]
names = [f"{r['name'][:12]}" for _, r in df_ranked.iterrows()]

# 1. 에너지 점수
ax1 = axes[0, 0]
ax1.bar(range(len(df_ranked)), df_ranked['energy_score'], color=colors, alpha=0.8)
ax1.set_xticks(range(len(df_ranked)))
ax1.set_xticklabels(names, rotation=45, ha='right')
ax1.set_ylabel('Score')
ax1.set_title('Energy Change Score')
ax1.grid(axis='y', alpha=0.3)

# 2. Ca 활성 점수
ax2 = axes[0, 1]
ax2.bar(range(len(df_ranked)), df_ranked['ca_score'], color=colors, alpha=0.8)
ax2.set_xticks(range(len(df_ranked)))
ax2.set_xticklabels(names, rotation=45, ha='right')
ax2.set_ylabel('Score')
ax2.set_title('Ca Activity Score (Leaching/Displacement)')
ax2.grid(axis='y', alpha=0.3)

# 3. Si 배위수 점수
ax3 = axes[1, 0]
ax3.bar(range(len(df_ranked)), df_ranked['si_score'], color=colors, alpha=0.8)
ax3.set_xticks(range(len(df_ranked)))
ax3.set_xticklabels(names, rotation=45, ha='right')
ax3.set_ylabel('Score')
ax3.set_title('Si Coordination Score')
ax3.grid(axis='y', alpha=0.3)

# 4. CO2 저감
ax4 = axes[1, 1]
ax4.bar(range(len(df_ranked)), df_ranked['co2_reduction'], color=colors, alpha=0.8)
ax4.set_xticks(range(len(df_ranked)))
ax4.set_xticklabels(names, rotation=45, ha='right')
ax4.set_ylabel('CO2 Reduction (%)')
ax4.set_title('CO2 Reduction Potential')
ax4.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'comparison_metrics_detail.png', dpi=150)
plt.show()

In [None]:
# 소스별 평균 비교
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

source_groups = df.groupby('source')

# 1. 수화 점수 평균
ax1 = axes[0]
means1 = source_groups['hydration_score'].mean()
bars1 = ax1.bar(means1.index, means1.values, color=['#2E86AB', '#A23B72'])
ax1.set_ylabel('Average Score')
ax1.set_title('Hydration Score')
for bar, val in zip(bars1, means1.values):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
             f'{val:.1f}', ha='center', fontsize=11)

# 2. 최종 점수 평균
ax2 = axes[1]
means2 = source_groups['final_score'].mean()
bars2 = ax2.bar(means2.index, means2.values, color=['#2E86AB', '#A23B72'])
ax2.set_ylabel('Average Score')
ax2.set_title('Final Score (incl. CO2)')
for bar, val in zip(bars2, means2.values):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
             f'{val:.1f}', ha='center', fontsize=11)

# 3. CO2 저감 평균
ax3 = axes[2]
means3 = source_groups['co2_reduction'].mean()
bars3 = ax3.bar(means3.index, means3.values, color=['#2E86AB', '#A23B72'])
ax3.set_ylabel('CO2 Reduction (%)')
ax3.set_title('CO2 Reduction')
for bar, val in zip(bars3, means3.values):
    ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
             f'{val:.0f}%', ha='center', fontsize=11)

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'source_comparison_hydration.png', dpi=150)
plt.show()

## 6. 결과 저장

In [None]:
# CSV 저장
df_export = df_ranked[['rank', 'name', 'source', 'formula', 'hydration_score', 
                        'co2_reduction', 'final_score', 'grade']].copy()
df_export.columns = ['Rank', 'Name', 'Source', 'Formula', 'Hydration_Score', 
                     'CO2_Reduction', 'Final_Score', 'Grade']

csv_path = RESULTS_DIR / 'final_ranking_hydration.csv'
df_export.to_csv(csv_path, index=False)
print(f"CSV saved: {csv_path}")

In [None]:
# JSON 저장
final_comparison = {
    'methodology': {
        'description': 'Hydration simulation-based comparison',
        'metrics': {
            'energy_score': 'Based on energy change during hydration (40%)',
            'ca_score': 'Based on Ca leaching/displacement (30%)',
            'si_score': 'Based on Si coordination number (30%)',
            'final_score': 'hydration_score * 0.7 + co2_reduction * 0.3'
        }
    },
    'summary': {
        'total_candidates': len(df),
        'industrial_waste_count': len(df[df['source'] == 'Industrial Waste']),
        'mattergen_count': len(df[df['source'] == 'MatterGen']),
        'top_candidate': df_ranked.iloc[0]['name'],
        'top_score': float(df_ranked.iloc[0]['final_score']),
        'average_by_source': {
            'Industrial Waste': float(df[df['source'] == 'Industrial Waste']['final_score'].mean()),
            'MatterGen': float(df[df['source'] == 'MatterGen']['final_score'].mean())
        }
    },
    'rankings': df_ranked[['rank', 'name', 'source', 'formula', 
                           'hydration_score', 'co2_reduction', 
                           'final_score', 'grade']].to_dict('records')
}

json_path = RESULTS_DIR / 'final_comparison_hydration.json'
with open(json_path, 'w') as f:
    json.dump(final_comparison, f, indent=2)
print(f"JSON saved: {json_path}")

---

## 결론

### 비교 결과 해석

| 소스 | 장점 | 단점 |
|------|------|------|
| **산업폐기물** | 검증된 수화 반응, 산업적 활용 가능 | CO2 저감 한계 (75-85%) |
| **MatterGen** | 높은 CO2 저감 (90%), 새로운 조성 | 실험적 검증 필요 |

### 권장 사항

1. **단기**: 산업폐기물 Top 후보 활용 (검증됨)
2. **중기**: MatterGen 구조 합성 및 실험 검증
3. **장기**: AI 기반 최적화로 새로운 바인더 개발

### 다음 단계

- **12_Final_Figures.ipynb**: 논문용 그림 생성
- **13_Mechanical_Properties.ipynb**: 기계적 특성 평가