-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup_cluster.txt
53 lines (45 loc) · 1.79 KB
/
setup_cluster.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# log in to Euler
ssh {YOUR_ETH_USERNAME}@euler.ethz.ch
# install Miniconda (only once)
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x Miniconda3-latest-Linux-x86_64.sh
./Miniconda3-latest-Linux-x86_64.sh
!!! IMPORTANT !!! choose the following directory while installing: /cluster/scratch/{YOUR_ETH_USERNAME}/miniconda3
rm -rf Miniconda3-latest-Linux-x86_64.sh
Close your current terminal, open a new terminal and log in to Euler again
# set up git credentials on Euler, download the repository, download the dataset (only once)
ssh-keygen
more /cluster/home/{YOUR_ETH_USERNAME}/.ssh/id_rsa.pub (take the output and add it to your Github account/SSH keys)
git clone git@github.com:ardarslan/3d-human-motion-synthesis.git
mv 3d-human-motion-synthesis dlproject
cd $SCRATCH
env2lmod
module load gcc/6.3.0 cuda/10.1.243 cudnn/7.6.4 python_gpu/3.8.5 eth_proxy
pip3 install gshell
gshell init
gshell download --with-id 138U1dIxOERIEzYI6WB0jpEonumn7JUQ2 --recursive
mv datasets dlproject_datasets
cd dlproject_datasets/amass
cat *.tar.bz2 | tar -ixjv
rm -rf *.tar.bz2
# create conda environment (only once)
cd dlproject
conda env create -f environment.yml
# update conda environment (if you change environment.yml)
conda env update -f environment.yml
# IMPORTANT: to avoid problems with tensorboard timeout issue
Do not install PyTorch Profiler TensorBoard if your vs code asks you to install.
# if you installed tensorboard by mistake:
pip3 uninstall torch-tb-profiler
# daily use
ssh {YOUR_ETH_USERNAME}@euler.ethz.ch
env2lmod
module load gcc/6.3.0 cuda/10.1.243 cudnn/7.6.4 python_gpu/3.8.5 eth_proxy
cd dlproject
conda activate dlproject
cd src/
bsub -n 4 -W 4:00 -o logs -R "rusage[mem=4096, ngpus_excl_p=1]" python run.py
# commands for the job submitted
bjobs
bpeek {PID}
bkill {PID}