# `q_net`

`-` 데이터를 모아보자. 

In [193]:
current_states = collections.deque(maxlen=50) 
actions = collections.deque(maxlen=50) 
next_states = collections.deque(maxlen=50) 
rewards = collections.deque(maxlen=50) 
terminations = collections.deque(maxlen=50) 

current_state, _ = env.reset()
for t in range(500): 
    action = env.action_space.sample()
    next_state, reward, terminated, _, _ = env.step(action)

    current_states.append(current_state) 
    actions.append(action)
    next_states.append(next_state)
    rewards.append(reward)
    terminations.append(terminated) 
    
    current_state = next_state 
    if terminated: break 

`-` 이전코드에서 아래에 대응하는 부분을 잘 구현하면 된다. 

```Python
## 1. q[x,y,a] 
agent.q = np.zeros([4,4,4])  

## 2. q_estimate  
x,y = agent.current_state
xx,yy = agent.next_state
a = agent.action 
q_estimated = agent.q[x,y,a] 

## 3. q_realistic = reward + 0.99 * q_future
if agent.terminated:
    q_realistic = agent.reward
else:
    q_future = q[xx,yy,:].max()
    q_realistic = agent.reward + 0.99 * q_future

## 4. q_estimate 와 q_realistic 를 비슷하게 만들어주는 역할을 하는 코드 
diff = q_realistic - q_estimated 
agent.q[x,y,a] = q_estimated + 0.05 * diff
``` 

`1`. q_net를 설정

In [201]:
q_net = torch.nn.Sequential(
    torch.nn.Linear(8,128), # 8개의 상태공간 
    torch.nn.ReLU(),
    torch.nn.Linear(128,64), 
    torch.nn.ReLU(),
    torch.nn.Linear(64,32), 
    torch.nn.ReLU(),
    torch.nn.Linear(32,4) # 4개의 action값들 
)

In [202]:
torch.tensor(current_state)

tensor([ 0.3533,  0.0950, -0.0055, -0.0028, -1.5255,  0.0227,  1.0000,  0.0000])

In [192]:
q_net(torch.tensor(current_state))

tensor([-0.1506,  0.0799, -0.0546,  0.1738], grad_fn=<AddBackward0>)

`1`. q_net를 설정 (배치버전) 

In [199]:
torch.tensor(current_states)

  torch.tensor(current_states)


tensor([[ 0.0945,  1.2319,  0.3431, -0.5353, -0.4019, -0.3059,  0.0000,  0.0000],
        [ 0.0977,  1.2194,  0.3319, -0.5592, -0.4147, -0.2562,  0.0000,  0.0000],
        [ 0.1013,  1.2076,  0.3634, -0.5258, -0.4279, -0.2634,  0.0000,  0.0000],
        [ 0.1048,  1.1952,  0.3725, -0.5556, -0.4432, -0.3064,  0.0000,  0.0000],
        [ 0.1086,  1.1834,  0.3910, -0.5309, -0.4593, -0.3222,  0.0000,  0.0000],
        [ 0.1124,  1.1709,  0.3910, -0.5576, -0.4754, -0.3222,  0.0000,  0.0000],
        [ 0.1161,  1.1579,  0.3909, -0.5843, -0.4915, -0.3222,  0.0000,  0.0000],
        [ 0.1200,  1.1442,  0.3982, -0.6136, -0.5094, -0.3575,  0.0000,  0.0000],
        [ 0.1241,  1.1307,  0.4350, -0.6065, -0.5269, -0.3503,  0.0000,  0.0000],
        [ 0.1284,  1.1165,  0.4440, -0.6372, -0.5467, -0.3968,  0.0000,  0.0000],
        [ 0.1326,  1.1018,  0.4369, -0.6606, -0.5647, -0.3598,  0.0000,  0.0000],
        [ 0.1369,  1.0864,  0.4466, -0.6908, -0.5851, -0.4066,  0.0000,  0.0000],
        [ 0.1412

In [200]:
torch.tensor(np.array(current_states)).shap

tensor([[ 0.0945,  1.2319,  0.3431, -0.5353, -0.4019, -0.3059,  0.0000,  0.0000],
        [ 0.0977,  1.2194,  0.3319, -0.5592, -0.4147, -0.2562,  0.0000,  0.0000],
        [ 0.1013,  1.2076,  0.3634, -0.5258, -0.4279, -0.2634,  0.0000,  0.0000],
        [ 0.1048,  1.1952,  0.3725, -0.5556, -0.4432, -0.3064,  0.0000,  0.0000],
        [ 0.1086,  1.1834,  0.3910, -0.5309, -0.4593, -0.3222,  0.0000,  0.0000],
        [ 0.1124,  1.1709,  0.3910, -0.5576, -0.4754, -0.3222,  0.0000,  0.0000],
        [ 0.1161,  1.1579,  0.3909, -0.5843, -0.4915, -0.3222,  0.0000,  0.0000],
        [ 0.1200,  1.1442,  0.3982, -0.6136, -0.5094, -0.3575,  0.0000,  0.0000],
        [ 0.1241,  1.1307,  0.4350, -0.6065, -0.5269, -0.3503,  0.0000,  0.0000],
        [ 0.1284,  1.1165,  0.4440, -0.6372, -0.5467, -0.3968,  0.0000,  0.0000],
        [ 0.1326,  1.1018,  0.4369, -0.6606, -0.5647, -0.3598,  0.0000,  0.0000],
        [ 0.1369,  1.0864,  0.4466, -0.6908, -0.5851, -0.4066,  0.0000,  0.0000],
        [ 0.1412