# DQN with modifications ( Acrobot -v1 ) 

In [None]:
import gym
import random
import torch
import numpy as np 
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

## Environment - 'Acrobot - v1'

In [None]:
env = gym.make('Acrobot-v1')
env.seed(0)

state_shape = env.observation_space.shape[0]
action_shape = env.action_space.n

print('State shape: ', state_shape)
print('Action shape: ', action_shape)

## Overall DQN Function

In [None]:
def dqn(n_episodes=10000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """
    scores = []                 # list containing scores from each episode
    scores_window_printing = deque(maxlen=10) # For printing in the graph
    scores_window= deque(maxlen=100)  # last 100 scores for checking if the avg is more than 195
    eps = eps_start                    # initialize epsilon
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_window.append(score)       # save most recent score
        scores_window_printing.append(score)              # save most recent score
        eps = max(eps_end, eps_decay*eps) # decrease epsilon
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")  
        if i_episode % 10 == 0: 
            scores.append(np.mean(scores_window_printing))        
        if i_episode % 100 == 0: 
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        if np.mean(scores_window)>=-80.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
            break
    return [np.array(scores),i_episode-100]


# Case 1- (+Q + E + T)

### Neural Network 
Input Layer - 4 nodes (State Shape) \
Hidden Layer 1 - 64 nodes \
Hidden Layer 2 - 64 nodes \
Output Layer - 2 nodes (Action Space) \
Optimizer - zero_grad()

### Network Update Frequency (YES)
Frequency of network switch - Every 10 episodes

###  Experience Replay (YES)
Total Replay Buffer Size - 10,000
Mini Batch Size - 64

### Loss Clipping (YES)
Gradient is clipped to 1 & -1


In [None]:
from config1 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings1 = 15
sibling_scores1 = []
sibling_lives1 = np.zeros(no_siblings1)

for i in range(no_siblings1):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives1[i]] = dqn()
    sibling_scores1.append(temp_scores)
    


In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings1):
    plt.plot(np.arange(len(no_siblings1[i])), no_siblings1[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 2- (+Q + E - T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Network Update Frequency (YES)
Frequency of network switch - Every 10 episodes

###  Experience Replay (YES)
Total Replay Buffer Size - 10,000
Mini Batch Size - 64

### Loss Clipping (NO)
No Gradient clipping present

In [None]:
from config2 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings2 = 15
sibling_scores2 = []
sibling_lives2 = np.zeros(no_siblings1)



for i in range(no_siblings2):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives2[i]] = dqn()
    sibling_scores2.append(temp_scores)
    


In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings2):
    plt.plot(np.arange(len(no_siblings2[i])), no_siblings2[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 3- (+Q -E +T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Network Update Frequency (YES)
Frequency of network switch - Every 5 episodes

###  Experience Replay (NO)
No Experience Replay / Experience Replay of Size 1

### Loss Clipping (YES)
Gradient is clipped to 1 & -1

In [None]:
from config3 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings3 = 15
sibling_scores3 = []
sibling_lives3 = np.zeros(no_siblings3)

for i in range(no_siblings3):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives3[i]] = dqn()
    sibling_scores1.append(temp_scores)
   

  return torch._C._cuda_getDeviceCount() > 0


Episode 56	Average Score: -489.84

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings3):
    plt.plot(np.arange(len(no_siblings3[i])), no_siblings3[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 4- (+Q -E -T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Network Update Frequency (YES)
Frequency of network switch - Every 5 episodes

###  Experience Replay (NO)
No Experience Replay / Experience Replay of Size 1

### Loss Clipping (NO)
No Gradient clipping present

In [None]:
from config4 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings4 = 15
sibling_scores4 = []
sibling_lives4 = np.zeros(no_siblings4)

for i in range(no_siblings4):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives4[i]] = dqn()
    sibling_scores4.append(temp_scores)
   

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings4):
    plt.plot(np.arange(len(no_siblings4[i])), no_siblings4[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 5- (-Q +E +T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Networks Update Frequency (NO)
Frequency of network switch - Every episode

###  Experience Replay (YES)
Total Replay Buffer Size - 10,000
Mini Batch Size - 64

### Loss Clipping (YES)
Gradient is clipped to 1 & -1

In [None]:
from config5 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings5 = 15
sibling_scores5 = []
sibling_lives5 = np.zeros(no_siblings5)

for i in range(no_siblings5):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives5[i]] = dqn()
    sibling_scores5.append(temp_scores)
   

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings5):
    plt.plot(np.arange(len(no_siblings5[i])), no_siblings5[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 6: (-Q +E -T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Network Update Frequency (NO)
Frequency of network switch - Every episode

###  Experience Replay (YES)
Total Replay Buffer Size - 10,000
Mini Batch Size - 64

### Loss Clipping (NO)
No Gradient clipping present

In [None]:
from config6 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings6 = 15
sibling_scores6 = []
sibling_lives6 = np.zeros(no_siblings6)

for i in range(no_siblings6):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives6[i]] = dqn()
    sibling_scores6.append(temp_scores)
   

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings6):
    plt.plot(np.arange(len(no_siblings6[i])), no_siblings6[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 7: (-Q -E +T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Network Update Frequency 
Frequency of network switch - Every episode

###  Experience Replay
No Experience Replay / Experience Replay of Size 1

### Loss Clipping
Gradient is clipped to 1 & -1

In [None]:
from config7 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings7 = 15
sibling_scores7 = []
sibling_lives7 = np.zeros(no_siblings1)

for i in range(no_siblings7):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives7[i]] = dqn()
    sibling_scores7.append(temp_scores)
   

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings7):
    plt.plot(np.arange(len(no_siblings7[i])), no_siblings7[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

# Case 8: (-Q -E -T)

### Neural Network 
Input Layer - 4 nodes (State Shape)
Hidden Layer 1 - 64 nodes
Hidden Layer 2 - 64 nodes
Output Layer - 2 nodes (Action Space)
Optimizer - zero_grad()

### Network Update Frequency 
Frequency of network switch - Every episode

###  Experience Replay
No Experience Replay / Experience Replay of Size 1

### Loss Clipping
No Gradient clipping present

In [None]:
from config8 import Agent

#agent = Agent(state_size=6,action_size = 3,seed = 0)
no_siblings8 = 15
sibling_scores8 = []
sibling_lives8 = np.zeros(no_siblings8)

for i in range(no_siblings8):
    
    agent = Agent(state_size=state_shape,action_size = action_shape,seed = 0)
    [temp_scores,sibling_lives8[i]] = dqn()
    sibling_scores8.append(temp_scores)
   

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

for i in range(no_siblings8):
    plt.plot(np.arange(len(no_siblings8[i])), no_siblings8[i])
    
    
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()