In [None]:
# test_npca_env.ipynb

# %%
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

# drl_framework 경로 추가
sys.path.append('drl_framework')
from NPCAEnv import NPCAEnv, make_npca_env

print("NPCA Environment 테스트 시작")

# %%
# 기본 환경 생성 테스트
print("1. 환경 생성 테스트")

env = NPCAEnv(
    max_ppdu_duration=50,
    max_obss_duration=100,
    max_backoff=1024,
    max_cw_index=6,
    seed=42
)

print(f"Action space: {env.action_space}")
print(f"Observation space: {env.observation_space}")

# %%
# Reset 테스트
print("2. Reset 테스트")

obs, info = env.reset()
print(f"Initial observation shape: {obs.shape}")
print(f"Initial observation: {obs}")
print(f"Info: {info}")

# 관찰값 해석 (Dict 형태)
print("\n관찰값 해석:")
print(f"Primary OBSS remain: {obs['primary_obss_remain'][0]:.3f} (실제: {int(obs['primary_obss_remain'][0] * env.max_obss_duration)})")
print(f"Primary intra busy: {obs['primary_intra_busy']}")
print(f"NPCA intra busy: {obs['npca_intra_busy']}")
print(f"NPCA OBSS remain: {obs['npca_obss_remain'][0]:.3f} (실제: {int(obs['npca_obss_remain'][0] * env.max_obss_duration)})")
print(f"Current backoff: {obs['current_backoff'][0]:.3f} (실제: {int(obs['current_backoff'][0] * env.max_backoff)})")
print(f"CW index: {obs['cw_index'][0]:.3f} (실제: {int(obs['cw_index'][0] * env.max_cw_index)})")
print(f"Success rate: {obs['recent_success_rate'][0]:.3f}")
print(f"PPDU duration: {obs['ppdu_duration'][0]:.3f} (실제: {int(obs['ppdu_duration'][0] * env.max_ppdu_duration)})")
print(f"Can finish before OBSS: {obs['can_finish_before_obss']}")

# %%
# Step 테스트 - 여러 행동 시도
print("3. Step 테스트")

actions = [0, 1, 0, 1, 1]  # PRIMARY 대기, NPCA 사용 등
rewards = []
observations = []

for i, action in enumerate(actions):
    print(f"\n--- Step {i+1}: Action {action} ({'PRIMARY 대기' if action == 0 else 'NPCA 사용'}) ---")
    
    obs, reward, done, truncated, info = env.step(action)
    rewards.append(reward)
    observations.append(obs.copy())
    
    print(f"Reward: {reward}")
    print(f"Done: {done}, Truncated: {truncated}")
    print(f"PPDU duration: {int(obs['ppdu_duration'][0] * env.max_ppdu_duration)}")
    print(f"Primary OBSS remain: {int(obs['primary_obss_remain'][0] * env.max_obss_duration)}")
    print(f"Can finish before OBSS: {bool(obs['can_finish_before_obss'])}")
    
    if done or truncated:
        print("Episode ended")
        break

print(f"\n총 보상: {sum(rewards)}")

# %%
# 랜덤 에이전트 테스트 (더 긴 에피소드)
print("4. 랜덤 에이전트 테스트")

np.random.seed(42)
env.reset(seed=42)

episode_rewards = []
episode_actions = []
episode_ppdu_durations = []
total_reward = 0
steps = 0

for step in range(50):  # 50스텝 실행
    # 랜덤 행동 선택
    action = np.random.choice([0, 1])
    
    obs, reward, done, truncated, info = env.step(action)
    
    episode_rewards.append(reward)
    episode_actions.append(action)
    episode_ppdu_durations.append(int(obs['ppdu_duration'][0] * env.max_ppdu_duration))
    total_reward += reward
    steps += 1
    
    if done or truncated:
        break

print(f"에피소드 완료: {steps} 스텝")
print(f"총 보상: {total_reward}")
print(f"평균 보상: {total_reward/steps:.2f}")

# 행동 분석
action_counts = {0: episode_actions.count(0), 1: episode_actions.count(1)}
print(f"행동 분포: PRIMARY 대기 {action_counts[0]}회, NPCA 사용 {action_counts[1]}회")

# %%
# 시각화
print("5. 결과 시각화")

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# 보상 시계열
axes[0,0].plot(episode_rewards)
axes[0,0].set_title('Episode Rewards')
axes[0,0].set_xlabel('Step')
axes[0,0].set_ylabel('Reward')
axes[0,0].grid(True)

# 행동 시계열
axes[0,1].plot(episode_actions, 'o-', alpha=0.7)
axes[0,1].set_title('Actions (0=PRIMARY, 1=NPCA)')
axes[0,1].set_xlabel('Step')
axes[0,1].set_ylabel('Action')
axes[0,1].set_ylim(-0.1, 1.1)
axes[0,1].grid(True)

# PPDU duration 분포
axes[1,0].hist(episode_ppdu_durations, bins=10, alpha=0.7)
axes[1,0].set_title('PPDU Duration Distribution')
axes[1,0].set_xlabel('PPDU Duration')
axes[1,0].set_ylabel('Frequency')
axes[1,0].grid(True)

# 행동별 평균 보상
action_rewards = {0: [], 1: []}
for action, reward in zip(episode_actions, episode_rewards):
    action_rewards[action].append(reward)

avg_rewards = [np.mean(action_rewards[0]) if action_rewards[0] else 0,
               np.mean(action_rewards[1]) if action_rewards[1] else 0]

axes[1,1].bar(['PRIMARY', 'NPCA'], avg_rewards, alpha=0.7)
axes[1,1].set_title('Average Reward by Action')
axes[1,1].set_ylabel('Average Reward')
axes[1,1].grid(True)

plt.tight_layout()
plt.show()

# %%
# 환경 파라미터별 테스트
print("6. 환경 파라미터 영향 테스트")

# 다양한 OBSS generation rate로 테스트
obss_rates = [0.05, 0.1, 0.2, 0.3]
results = {}

for rate in obss_rates:
    print(f"\nOBSS generation rate: {rate}")
    
    # 환경 재생성 (실제로는 동적으로 바꿀 수 없으므로 시뮬레이션)
    env_test = NPCAEnv(max_obss_duration=100, seed=42)
    
    # 간단한 테스트 실행
    env_test.reset()
    test_rewards = []
    
    for _ in range(20):
        # OBSS 상황을 인위적으로 만들기
        env_test.primary_obss_remain = max(1, int(np.random.exponential(1/rate) * 10))
        
        action = np.random.choice([0, 1])
        obs, reward, done, truncated, info = env_test.step(action)
        test_rewards.append(reward)
        
        if done or truncated:
            break
    
    results[rate] = {
        'avg_reward': np.mean(test_rewards),
        'total_reward': np.sum(test_rewards),
        'steps': len(test_rewards)
    }
    
    print(f"  평균 보상: {results[rate]['avg_reward']:.2f}")
    print(f"  총 보상: {results[rate]['total_reward']:.2f}")

# %%
print("7. 환경 정상성 검증")

# 상태 공간 범위 확인
print("상태 공간 검증:")
for _ in range(10):
    obs, _ = env.reset()
    # Dict 형태의 관찰값 검증
    for key, value in obs.items():
        if isinstance(value, np.ndarray):
            assert np.all(value >= 0.0) and np.all(value <= 1.0), f"{key}가 [0,1] 범위를 벗어남: {value}"
        elif key in ['primary_intra_busy', 'npca_intra_busy', 'can_finish_before_obss']:
            assert value in [0, 1], f"{key}가 0 또는 1이 아님: {value}"

print("✓ 모든 관찰값이 올바른 범위 내에 있음")

# 행동 공간 확인
print("\n행동 공간 검증:")
valid_actions = [0, 1]
for action in valid_actions:
    try:
        obs, reward, done, truncated, info = env.step(action)
        print(f"✓ Action {action} 정상 동작")
    except Exception as e:
        print(f"✗ Action {action} 오류: {e}")

# 보상 범위 확인
print("\n보상 범위 검증:")
env.reset()
rewards_sample = []
for _ in range(100):
    action = np.random.choice([0, 1])
    obs, reward, done, truncated, info = env.step(action)
    rewards_sample.append(reward)

print(f"보상 범위: {min(rewards_sample):.2f} ~ {max(rewards_sample):.2f}")
print(f"평균 보상: {np.mean(rewards_sample):.2f}")

print("\n🎉 NPCAEnv 기본 동작 테스트 완료!")

# %%

NPCA Environment 테스트 시작
1. 환경 생성 테스트
Action space: Discrete(2)
Observation space: Dict('can_finish_before_obss': Discrete(2), 'current_backoff': Box(0.0, 1.0, (1,), float32), 'cw_index': Box(0.0, 1.0, (1,), float32), 'npca_intra_busy': Discrete(2), 'npca_obss_remain': Box(0.0, 1.0, (1,), float32), 'ppdu_duration': Box(0.0, 1.0, (1,), float32), 'primary_intra_busy': Discrete(2), 'primary_obss_remain': Box(0.0, 1.0, (1,), float32), 'recent_success_rate': Box(0.0, 1.0, (1,), float32))
2. Reset 테스트


AttributeError: 'dict' object has no attribute 'shape'