Skip to content

Commit

Permalink
Merge pull request #271 from allenai/PythonProfiling2-UnwindingChanges
Browse files Browse the repository at this point in the history
Bring back performance
  • Loading branch information
dirkgr committed Sep 14, 2023
2 parents 41b0663 + 3a6e469 commit 6cc09fe
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion configs/v1-mix-medium.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ wandb:
model:
d_model: 4096
n_heads: 16
n_layers: 29
n_layers: 30
mlp_ratio: 8
alibi: true
alibi_bias_max: 8.0
Expand Down
3 changes: 2 additions & 1 deletion olmo/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from dataclasses import dataclass, field
from itertools import islice
from pathlib import Path
from pstats import SortKey
from typing import Any, Deque, Dict, List, Optional, TextIO, Tuple

import numpy as np
Expand Down Expand Up @@ -980,7 +981,7 @@ def fit(self):
profiler.enable()
elif self.global_step == 8:
profiler.disable()
profiler.print_stats()
profiler.print_stats(sort=SortKey.CUMULATIVE)
profiler = None
else:
log.info("Training loop complete")
Expand Down
8 changes: 4 additions & 4 deletions scripts/run_with_environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@

set -euo pipefail

# Redirect stdout and stderr so that we get a prefix with the node name
export NODENAME=$(hostname -s)
exec > >(trap "" INT TERM; sed -u "s/^/$NODENAME out: /")
exec 2> >(trap "" INT TERM; sed -u "s/^/$NODENAME err: /" >&2)

export MASTER_ADDR=$(scontrol show hostnames | head -n 1)
export MASTER_PORT=39591
export WORLD_SIZE=$SLURM_NTASKS
Expand All @@ -18,6 +14,10 @@ export LOCAL_WORLD_SIZE=$SLURM_NTASKS_PER_NODE
export LOCAL_RANK=$SLURM_LOCALID
export NODE_RANK=$((($RANK - $LOCAL_RANK) / $LOCAL_WORLD_SIZE))

# Redirect stdout and stderr so that we get a prefix with the node name
exec > >(trap "" INT TERM; sed -u "s/^/$NODENAME:$LOCAL_RANK out: /")
exec 2> >(trap "" INT TERM; sed -u "s/^/$NODENAME:$LOCAL_RANK err: /" >&2)

if [ $SLURM_LOCALID -eq 0 ] ; then
rm -rf /dev/shm/* || true
rocm-smi || true # rocm-smi returns exit code 2 even when it succeeds
Expand Down

0 comments on commit 6cc09fe

Please sign in to comment.