diff --git a/examples/megatron-qwen3/training/run.sh b/examples/megatron-qwen3/training/run.sh index 462c439..a29a0e6 100644 --- a/examples/megatron-qwen3/training/run.sh +++ b/examples/megatron-qwen3/training/run.sh @@ -63,4 +63,4 @@ PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True NPROC_PER_NODE=$BT_NUM_GPUS NNO --use_precision_aware_optimizer true \ --use_hf 1 \ --wandb_project qwen3_moe_megatron \ - --wandb_exp_name all_training_b10f \ \ No newline at end of file + --wandb_exp_name all_training_b10f