In [1]:
# These are some commands to help you troubleshoot your setup
# There is no action items that you need to bring up to the teaching staff
# Remember, the job is for you to complete, we only provide you a potential
# path, but you don't have to take it. We're also not going to help troubleshoot
# installation issues. For that, feel free to use online resources depening
# on the type of issue (or the application) that you are seeing.

# Good luck!

In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import warnings ; warnings.filterwarnings('ignore')
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['OMP_NUM_THREADS'] = '1'



import torch
import argparse
import gfootball.env as football_env
import gym
import ray
from ray.rllib.agents import ppo
from ray import tune
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.tune.registry import register_env

import tempfile
import gym
from gfootball import env as fe
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
from gym import wrappers

import ray
from ray import tune
from IPython.display import HTML

from rldm.utils import gif_tools as gt
from rldm.utils import football_tools as ft

In [8]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import warnings ; warnings.filterwarnings('ignore')
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['OMP_NUM_THREADS'] = '1'

import torch
import pybullet_envs
import argparse
import gfootball.env as football_env
import gym
import ray
from ray.rllib.agents import ppo
from ray import tune
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.tune.registry import register_env

import tempfile
import gym
from gfootball import env as fe
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
from gym import wrappers

import ray
from ray import tune
from IPython.display import HTML

from rldm.utils import gif_tools as gt
from rldm.utils import football_tools as ft

In [10]:
# We expect the following packages:

# gfootball                     2.10.1
# ray                           1.6.0
# tensorflow                    2.6.0
# tensorflow-estimator          2.6.0
# tensorboard                   2.6.0
# tensorboard-data-server       0.6.1
# tensorboard-plugin-wit        1.8.0
# tensorboardX                  2.4
# torch                         1.9.0+cu111
# torchaudio                    0.9.0
# torchvision                   0.10.0+cu111
# rldm                          1.0          /mnt

# If you don't have the same packages, then maybe you don't have GPUs? Or you didn't use the Docker image provided?

# No need to reach out if you don't have the same packages. This is only for your information.

In [12]:
!pip list | grep -i gfootball
!pip list | grep -i ray
!pip list | grep -i tensorflow
!pip list | grep -i tensorboard
!pip list | grep -i torch
!pip list | grep -i rldm

gfootball                     2.10.2
ray                           1.6.0
tensorflow                    2.8.0
tensorflow-io-gcs-filesystem  0.24.0
tensorboard                   2.8.0
tensorboard-data-server       0.6.1
tensorboard-plugin-wit        1.8.1
tensorboardX                  2.5
torch                         1.9.0+cu111
torchaudio                    0.9.0
torchvision                   0.10.0+cu111
rldm                          1.0                 /mnt


In [16]:
# The command below will fail if you don't have a GPU.
# There is no action item for you if you don't have a GPU and the command above fails.

# If you do have a GPU and it is not showing on the output of the command above,
# then something is wrong. Did you follow all the steps carefully? Are you passing the right
# GPU command to docker when starting? These are some initial ideas for you to troubleshoot.

In [18]:
!nvidia-smi

Mon Apr 10 13:32:38 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 531.14       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1070         On | 00000000:01:00.0  On |                  N/A |
|  0%   54C    P8               17W / 151W|    556MiB /  8192MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

In [20]:
# The number of vCPUs on the system! Having lots of vCPUs can be
# very helpful specially for this project. If you have multiple systems,
# you may want to consider the system with the highest number of vCPUs.

In [22]:
!grep -c 'processor' /proc/cpuinfo

8


In [26]:
# For you to become aware of the memory available on your system.
# If you are considering using complex models, such as models with
# Recurrent cells, such as LSTMs, GRUs, or AttentionNets, then make
# sure you have sufficient memory for your model. "Sufficient" here
# depends on the actual model you end up using (number of params, etc.)

In [27]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:           15Gi       2.0Gi       180Mi       3.0Mi        13Gi        13Gi
Swap:         4.0Gi       2.0Mi       4.0Gi


In [28]:
# The command below helps you become aware of the available
# free space on your system. This can become important if you
# are saving lots of checkpoints as you train. So make sure you
# have enough for whatever you're planning to do

In [29]:
!df -h

Filesystem      Size  Used Avail Use% Mounted on
overlay        1007G   58G  899G   7% /
tmpfs            64M     0   64M   0% /dev
shm              64M     0   64M   0% /dev/shm
drvfs           466G  465G  237M 100% /mnt
/dev/sdd       1007G   58G  899G   7% /etc/hosts
none            466G  465G  237M 100% /usr/bin/nvidia-smi
none            7.8G     0  7.8G   0% /usr/lib/x86_64-linux-gnu/libcuda.so.1
none            7.8G     0  7.8G   0% /usr/lib/x86_64-linux-gnu/libdxcore.so
none            7.8G     0  7.8G   0% /dev/dxg
tmpfs           7.8G     0  7.8G   0% /proc/acpi
tmpfs           7.8G     0  7.8G   0% /sys/firmware


In [30]:
# The following command helps you verify
# whether PyTorch is seeing your GPU or not.
# It should say True if you do have a GPU and you
# are using the provided Docker images, and if
# you followed the instructions carefully

# This is info for you only, to help you on your path
# Feel free to try something else on your own!

In [31]:
torch.cuda.is_available()

True