# 03. Visualization (시각화)

7개 척도 상관행렬 + 분포 히스토그램

In [None]:
%pip install pandas numpy matplotlib seaborn -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')
print(f'작업 폴더: {os.getcwd()}')

In [None]:
# 데이터 로드
scores = pd.read_csv('data/processed/sapa_scores.csv')
scales = ['NEO_O', 'NEO_C', 'NEO_E', 'NEO_A', 'NEO_N', 'Ideology', 'Honesty_Humility']

print(f"데이터: {len(scores):,}명, {len(scales)}개 척도")
print(scores[scales].describe().round(2))

## 1. 상관행렬

In [None]:
corr = scores[scales].corr()

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdBu_r', 
            center=0, square=True, linewidths=0.5, vmin=-1, vmax=1)
ax.set_title('7개 성격 척도 상관행렬')
plt.tight_layout()
plt.savefig('reports/correlation_matrix.png', dpi=150)
print('저장: reports/correlation_matrix.png')
plt.show()

In [None]:
# Pairwise N 확인
def pairwise_n(df, cols):
    n_matrix = pd.DataFrame(index=cols, columns=cols)
    for c1 in cols:
        for c2 in cols:
            n_matrix.loc[c1, c2] = df[[c1, c2]].dropna().shape[0]
    return n_matrix

print("=== Pairwise N ===")
pairwise_n(scores, scales)

In [None]:
# 주요 상관관계
print("=== 주요 상관관계 ===")
print(f"Ideology ↔ NEO_O: {corr.loc['Ideology', 'NEO_O']:.3f}")
print(f"Honesty_Humility ↔ NEO_A: {corr.loc['Honesty_Humility', 'NEO_A']:.3f}")
print(f"NEO_N ↔ NEO_E: {corr.loc['NEO_N', 'NEO_E']:.3f}")
print(f"NEO_N ↔ NEO_C: {corr.loc['NEO_N', 'NEO_C']:.3f}")

## 2. Big Five 분포

In [None]:
big5 = ['NEO_O', 'NEO_C', 'NEO_E', 'NEO_A', 'NEO_N']
labels = ['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Neuroticism']

fig, axes = plt.subplots(1, 5, figsize=(15, 3))
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7']

for i, (scale, label) in enumerate(zip(big5, labels)):
    axes[i].hist(scores[scale].dropna(), bins=30, color=colors[i], edgecolor='black', alpha=0.7)
    axes[i].axvline(scores[scale].mean(), color='red', linestyle='--')
    axes[i].set_title(label)
    axes[i].set_xlabel('Score (1-6)')

axes[0].set_ylabel('Frequency')
plt.tight_layout()
plt.savefig('reports/big_five_distributions.png', dpi=150)
print('저장: reports/big_five_distributions.png')
plt.show()

## 3. Ideology & Honesty-Humility 분포

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

axes[0].hist(scores['Ideology'].dropna(), bins=50, color='coral', edgecolor='black', alpha=0.7)
axes[0].axvline(0, color='black', linestyle='-')
axes[0].axvline(scores['Ideology'].mean(), color='red', linestyle='--')
axes[0].set_title('Ideology (z-score)')
axes[0].set_xlabel('Score')
axes[0].set_ylabel('Frequency')

axes[1].hist(scores['Honesty_Humility'].dropna(), bins=50, color='teal', edgecolor='black', alpha=0.7)
axes[1].axvline(0, color='black', linestyle='-')
axes[1].axvline(scores['Honesty_Humility'].mean(), color='red', linestyle='--')
axes[1].set_title('Honesty-Humility (z-score)')
axes[1].set_xlabel('Score')

plt.tight_layout()
plt.savefig('reports/ideology_hh_distributions.png', dpi=150)
print('저장: reports/ideology_hh_distributions.png')
plt.show()

In [None]:
print("\n=== 시각화 완료 ===")
print("생성된 파일:")
print("  - reports/correlation_matrix.png")
print("  - reports/big_five_distributions.png")
print("  - reports/ideology_hh_distributions.png")