# 03. Visualization (시각화)

## 목표
- 7개 척도 상관행렬
- 점수 분포 히스토그램

## 참조 파일
- `reports/step2_preprocess.json` - 계산된 척도 목록

In [None]:
%pip install pandas numpy matplotlib seaborn -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os

# 작업 디렉토리 설정
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')
print(f'작업 폴더: {os.getcwd()}')

# 한글 폰트 (Windows)
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

In [None]:
# Step 2 결과 로드
with open('reports/step2_preprocess.json', 'r', encoding='utf-8') as f:
    step2 = json.load(f)

scales = step2['results']['scores']['calculated_scales']
scores_file = step2['results']['scores']['output_file']

print(f"척도: {scales}")
print(f"파일: {scores_file}")

In [None]:
# 점수 데이터 로드
scores = pd.read_csv(scores_file)
print(f"데이터: {len(scores):,}명 × {len(scores.columns)}열")

---
## Part 1: 상관행렬

In [None]:
# 상관행렬 계산 (pairwise)
corr_matrix = scores[scales].corr()

# 히트맵
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='RdBu_r', 
            center=0, square=True, linewidths=0.5, ax=ax)
ax.set_title('7개 성격 척도 상관행렬', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('reports/correlation_matrix.png', dpi=150)
print("✅ 저장: reports/correlation_matrix.png")
plt.show()

In [None]:
# Pairwise N 확인
def pairwise_n(df, cols):
    n_matrix = pd.DataFrame(index=cols, columns=cols, dtype=int)
    for c1 in cols:
        for c2 in cols:
            n_matrix.loc[c1, c2] = df[[c1, c2]].dropna().shape[0]
    return n_matrix

print("=== Pairwise N (상관 계산에 사용된 표본 수) ===")
pairwise_n(scores, scales)

---
## Part 2: 분포 히스토그램

In [None]:
# Big Five 분포 (1-6점 범위)
big_five = ['NEO_O', 'NEO_C', 'NEO_E', 'NEO_A', 'NEO_N']

fig, axes = plt.subplots(1, 5, figsize=(15, 3))
for i, col in enumerate(big_five):
    axes[i].hist(scores[col].dropna(), bins=30, edgecolor='black', alpha=0.7)
    axes[i].set_title(col)
    axes[i].set_xlabel('점수 (1-6)')
fig.suptitle('Big Five 점수 분포', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('reports/big_five_distributions.png', dpi=150)
print("✅ 저장: reports/big_five_distributions.png")
plt.show()

In [None]:
# Ideology & Honesty-Humility 분포 (z-score 기반)
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

axes[0].hist(scores['Ideology'].dropna(), bins=50, edgecolor='black', alpha=0.7, color='coral')
axes[0].axvline(0, color='red', linestyle='--', label='평균=0')
axes[0].set_title('Ideology')
axes[0].set_xlabel('z-score')
axes[0].legend()

axes[1].hist(scores['Honesty_Humility'].dropna(), bins=50, edgecolor='black', alpha=0.7, color='teal')
axes[1].axvline(0, color='red', linestyle='--', label='평균=0')
axes[1].set_title('Honesty-Humility')
axes[1].set_xlabel('z-score')
axes[1].legend()

plt.tight_layout()
plt.savefig('reports/ideology_hh_distributions.png', dpi=150)
print("✅ 저장: reports/ideology_hh_distributions.png")
plt.show()

---
## 요약

In [None]:
print("=" * 50)
print("=== 시각화 완료 ===")
print("=" * 50)
print("생성된 파일:")
print("  - reports/correlation_matrix.png")
print("  - reports/big_five_distributions.png")
print("  - reports/ideology_hh_distributions.png")
print("=" * 50)