<a href="https://colab.research.google.com/github/andrewjason21/repo/blob/main/Copy_of_Deep_Q_Learning_for_Lunar_Landing_Partial_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Q-Learning for Lunar Landing

## Part 0 - Installing the required packages and importing the libraries

### Installing Gymnasium

In [1]:
!pip install gymnasium
!pip install "gymnasium[atari, accept-rom-license]"
!apt-get install -y swig
!pip install gymnasium[box2d]

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1
Collecting shimmy<1.0,>=0.1.0 (from shimmy[atari]<1.0,>=0.1.0; extra == "atari"->gymnasium[accept-rom-license,atari])
  Downloading Shimmy-0.2.1-py3-none-any.whl.metadata (2.3 kB)
Collecting autorom~=0.4.2 (from autorom[accept-rom-license]~=0.4.2; extra == "accept-rom-license"->gymnasium[accept-rom-license,atari])
  Downloading AutoROM-0.4.2-py3-none-any.whl.metadata (2.8 kB)
Collecting AutoROM.accep

### Importing the libraries

In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.autograd as autograd
from torch.autograd import Variable
from collections import deque, namedtuple

## Part 1 - Building the AI

### Creating the architecture of the Neural Network

In [None]:
class Network(nn.Module):

  def __init__(self, state_size,action_size,seed=42):
      super(Network,self).__init__()
      self.seed=torch.manual_seed(seed)
      self.fc1=nn.linear(state_size,64)
      self.fc2=nn.linear(64,64)
      self.fc3=nn.linear(64,action_size)

      def forward (self,state):
        x=self.fc1(state)
        x=F.relu(x)
        x=self.fc2(x)
        x=F.relu(x)
        return self.fc3(x)

## Part 2 - Training the AI

### Setting up the environment

> Indented block



In [None]:
import gymnasium as gym
env=gym.make('LunarLander-v2')
state_shape=env.observation_space.shape
state_size=env.observation_space.shape[0]
number_actions=env.action_space.n
print('state_shape: ',state_shape)
print('state_size: ',state_size)
print('number_actions: ',number_actions)


state_shape:  (8,)
state_size:  8
number_actions:  4


### Initializing the hyperparameters

In [None]:
learning_rate=5e-4
minibatch_size=100
discount_factor=0.99
replay_buffer_size=int(1e5)
interpolation_parameter=1e-3

  and should_run_async(code)


### Implementing Experience Replay

In [None]:
class ReplayMemory(object):

 def __init__(self,capacity):
     self.device=torch.device("cuda:0"if torch.cuda.is_avalaible() else "cpu")
     self.capacity=capacity
     self.memory=[]

def push(self,event):
  self.memory.append(event)
  if len(self.memory)>self.capacity:
    del self.memory[0]

def sample(self,batch_size):
 experiences=random.sample(self.memory,k=batch_size)
 states=torch.from_numpy(np.vstack([e[0]for e in experiences if e is not None])).float().to(self.device)
 actions=torch.from_numpy(np.vstack([e[1]for e in experiences if e is not None])).long().to(self.device)
 rewards=torch.from_numpy(np.vstack([e[2]for e in experiences if e is not None])).float().to(self.device)
 next_states=torch.from_numpy(np.vstack([e[3]for e in experiences if e is not None])).float().to(self.device)
 dones=torch.from_numpy(np.vstack([e[4]for e in experiences if e is not None])).astype(np.uint8).float().to(self.device)
 return states,next-states,actions,rewards,dones

### Implementing the DQN class

In [None]:
class Agent():
 def __init__(self,state_size,action_size):
  self.device=torch.device("cuda:0"if torch.cuda.is_avalaible() else "cpu")
  self.state_size=state_size
  self.action_size=action_size
  self.local_qnetwork=Newtork(state_size,action_size),to(self.device)
  self.target_qnetwork=Newtork(state_size,action_size),to(self.device)
  self.optimizer=optime.Adam(self.local_qnetwork.parameters(),lr=learning_rate)
  self.memory=ReplayMemory(replay_buffer_size)
  self.step=0

### Initializing the DQN agent

### Training the DQN agent

## Part 3 - Visualizing the results

In [2]:
import glob
import io
import base64
import imageio
from IPython.display import HTML, display
from gym.wrappers.monitoring.video_recorder import VideoRecorder

def show_video_of_model(agent, env_name):
    env = gym.make(env_name, render_mode='rgb_array')
    state, _ = env.reset()
    done = False
    frames = []
    while not done:
        frame = env.render()
        frames.append(frame)
        action = agent.act(state)
        state, reward, done, _, _ = env.step(action.item())
    env.close()
    imageio.mimsave('video.mp4', frames, fps=30)

show_video_of_model(agent, 'LunarLander-v2')

def show_video():
    mp4list = glob.glob('*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        display(HTML(data='''<video alt="test" autoplay
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
    else:
        print("Could not find video")

show_video()

NameError: name 'agent' is not defined