{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":667313388,"defaultBranch":"main","name":"Megatron-DeepSpeed","ownerLogin":"YizhouZ","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2023-07-17T08:19:10.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/86939711?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1711515556.0","currentOid":""},"activityList":{"items":[{"before":"f285bb0c11294ffc3c5b5650571e3158207f188e","after":"6b7ef016ad1ea9a7687ab2706318fa1956c073f3","ref":"refs/heads/yizhou/fupdate_flash","pushedAt":"2024-04-02T15:33:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"use hasattr","shortMessageHtmlLink":"use hasattr"}},{"before":"a8ede80c32dfacac23cc441bab8308cb1427bb30","after":"f285bb0c11294ffc3c5b5650571e3158207f188e","ref":"refs/heads/yizhou/fupdate_flash","pushedAt":"2024-04-02T14:45:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"add use-flash-attn-builder to make flash_attn usage clear and compatible","shortMessageHtmlLink":"add use-flash-attn-builder to make flash_attn usage clear and compatible"}},{"before":null,"after":"a8ede80c32dfacac23cc441bab8308cb1427bb30","ref":"refs/heads/yizhou/fupdate_flash","pushedAt":"2024-03-27T04:59:16.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"remove unnecessary codes for latest flash-attn opbuilder","shortMessageHtmlLink":"remove unnecessary codes for latest flash-attn opbuilder"}},{"before":"796866fa74f23850b977d4023a7ed4f0031844ae","after":"ebe80252f492613fe60489224669a5a8f370dbd3","ref":"refs/heads/main","pushedAt":"2024-03-27T02:33:55.000Z","pushType":"push","commitsCount":46,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"MOE: Support disable top2 2nd expert sampling (#362)\n\nDeepSpeed's MoE top2 gating performs sampling to select 2nd expert.\r\nSupport configuration for disabling of sampling (i.e. using argmax).\r\nNew argument: --disable-moe-top2-2nd-expert-sampling.\r\n\r\nSigned-off-by: Moshe Island \r\nCo-authored-by: Moshe Island ","shortMessageHtmlLink":"MOE: Support disable top2 2nd expert sampling (microsoft#362)"}},{"before":"e8720bd3e04853314066b0570198c07b72d10ecd","after":"22a1fc1c4c2b2c427762b56dce236c34c7b5bd13","ref":"refs/heads/yizhou/flash_attn_builder","pushedAt":"2023-10-26T05:47:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"add global tag","shortMessageHtmlLink":"add global tag"}},{"before":null,"after":"e8720bd3e04853314066b0570198c07b72d10ecd","ref":"refs/heads/yizhou/flash_attn_builder","pushedAt":"2023-10-26T05:40:25.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"avoid building flash_attn op when unset use_flash_attn","shortMessageHtmlLink":"avoid building flash_attn op when unset use_flash_attn"}},{"before":"4822c87ee6adfa4e480614cbe3f1d8ae00bd3db7","after":"796866fa74f23850b977d4023a7ed4f0031844ae","ref":"refs/heads/main","pushedAt":"2023-10-24T06:36:35.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"fix the config file unclose (#268)\n\nWe have a runtime checker, and it report: \r\n```\r\nResourceWarning: unclosed file <_io.TextIOWrapper name='deepspeed_config_13B.json' mode='r' encoding='utf-8'>\r\n open(args.deepspeed_config, 'r', encoding='utf-8'))\r\n```\r\nBecause when json.load(open()), it never close.","shortMessageHtmlLink":"fix the config file unclose (microsoft#268)"}},{"before":"8e286da1ede9d7e0ec435febd4d26f413f9c6bc3","after":"d75dbc1183d417828953e0deb7d1f98cee3970d7","ref":"refs/heads/yizhou/fix_flash_attn_assert","pushedAt":"2023-10-08T09:04:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"fix typo","shortMessageHtmlLink":"fix typo"}},{"before":null,"after":"8e286da1ede9d7e0ec435febd4d26f413f9c6bc3","ref":"refs/heads/yizhou/fix_flash_attn_assert","pushedAt":"2023-10-08T09:03:37.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"fix assert error while using xpu flash-attn","shortMessageHtmlLink":"fix assert error while using xpu flash-attn"}},{"before":"69e3c6a0d6d5af7d38f3311a7e11e40ba8280b34","after":"4822c87ee6adfa4e480614cbe3f1d8ae00bd3db7","ref":"refs/heads/main","pushedAt":"2023-10-08T07:49:58.000Z","pushType":"push","commitsCount":41,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"disable elastic checkpointing in ds config templates (#261)","shortMessageHtmlLink":"disable elastic checkpointing in ds config templates (microsoft#261)"}},{"before":null,"after":"5f8923e8e430aa82df309e6295c37463c6418c4b","ref":"refs/heads/yizhou/generalize_api","pushedAt":"2023-07-20T05:32:59.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"generalize api","shortMessageHtmlLink":"generalize api"}},{"before":"8c0fd4791000ec418570b0832e21c543f35256d8","after":"69e3c6a0d6d5af7d38f3311a7e11e40ba8280b34","ref":"refs/heads/main","pushedAt":"2023-07-20T05:02:31.134Z","pushType":"push","commitsCount":4,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support llama pretraining (#166)\n\n* add external user record\r\n\r\n* support llama pretraining\r\n\r\n* move example script to examples_deepspeed/\r\n\r\n---------\r\n\r\nCo-authored-by: Conglong \r\nCo-authored-by: LydiaXiaohongLi ","shortMessageHtmlLink":"support llama pretraining (microsoft#166)"}},{"before":"8c0fd4791000ec418570b0832e21c543f35256d8","after":"69e3c6a0d6d5af7d38f3311a7e11e40ba8280b34","ref":"refs/heads/main","pushedAt":"2023-07-20T05:02:31.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support llama pretraining (#166)\n\n* add external user record\r\n\r\n* support llama pretraining\r\n\r\n* move example script to examples_deepspeed/\r\n\r\n---------\r\n\r\nCo-authored-by: Conglong \r\nCo-authored-by: LydiaXiaohongLi ","shortMessageHtmlLink":"support llama pretraining (microsoft#166)"}},{"before":"c1125740b1c5172780a01e7b45801b453141adc9","after":null,"ref":"refs/heads/yizhou/flash_attn","pushedAt":"2023-07-19T05:16:55.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"}},{"before":"3e7f3d4cfae30585005ba9bee487ad4f182da4da","after":"c1125740b1c5172780a01e7b45801b453141adc9","ref":"refs/heads/yizhou/flash_attn","pushedAt":"2023-07-19T02:04:20.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"update","shortMessageHtmlLink":"update"}},{"before":"c41f21f6a108652dbda08bc45122258d0bc054b7","after":"3e7f3d4cfae30585005ba9bee487ad4f182da4da","ref":"refs/heads/yizhou/flash_attn","pushedAt":"2023-07-19T02:01:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"update","shortMessageHtmlLink":"update"}},{"before":"8c0fd4791000ec418570b0832e21c543f35256d8","after":"3e1ef9275ca7c174c923a8cc48e38e4d1c51e6d0","ref":"refs/heads/yizhou/fix_import","pushedAt":"2023-07-18T15:09:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"fix error comment in megatron/training.py","shortMessageHtmlLink":"fix error comment in megatron/training.py"}},{"before":null,"after":"8c0fd4791000ec418570b0832e21c543f35256d8","ref":"refs/heads/yizhou/fix_import","pushedAt":"2023-07-18T15:07:49.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support other devices besides cuda (#162)\n\n* support xpu\r\n\r\n* update transformer.py","shortMessageHtmlLink":"support other devices besides cuda (microsoft#162)"}},{"before":null,"after":"c41f21f6a108652dbda08bc45122258d0bc054b7","ref":"refs/heads/yizhou/flash_attn","pushedAt":"2023-07-18T07:35:32.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support flash_attn","shortMessageHtmlLink":"support flash_attn"}},{"before":"5de6159ca132799645fe7189d9dccf38a9852e84","after":null,"ref":"refs/heads/yizhou/support_xpu","pushedAt":"2023-07-18T07:18:37.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"}},{"before":"7f7cea32bc7b13ec8b3981b1a4616ed5d5dc48a3","after":"8c0fd4791000ec418570b0832e21c543f35256d8","ref":"refs/heads/main","pushedAt":"2023-07-18T07:17:15.739Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support other devices besides cuda (#162)\n\n* support xpu\r\n\r\n* update transformer.py","shortMessageHtmlLink":"support other devices besides cuda (microsoft#162)"}},{"before":"7f7cea32bc7b13ec8b3981b1a4616ed5d5dc48a3","after":"8c0fd4791000ec418570b0832e21c543f35256d8","ref":"refs/heads/main","pushedAt":"2023-07-18T07:17:15.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support other devices besides cuda (#162)\n\n* support xpu\r\n\r\n* update transformer.py","shortMessageHtmlLink":"support other devices besides cuda (microsoft#162)"}},{"before":"336de35b3e2721e1cb0802d93af99c37ef8f833f","after":"5de6159ca132799645fe7189d9dccf38a9852e84","ref":"refs/heads/yizhou/support_xpu","pushedAt":"2023-07-18T01:46:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"update transformer.py","shortMessageHtmlLink":"update transformer.py"}},{"before":"7f7cea32bc7b13ec8b3981b1a4616ed5d5dc48a3","after":"336de35b3e2721e1cb0802d93af99c37ef8f833f","ref":"refs/heads/yizhou/support_xpu","pushedAt":"2023-07-17T09:17:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support xpu","shortMessageHtmlLink":"support xpu"}},{"before":null,"after":"336de35b3e2721e1cb0802d93af99c37ef8f833f","ref":"refs/heads/my_mg/yizhou/support_xpu","pushedAt":"2023-07-17T09:15:47.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"support xpu","shortMessageHtmlLink":"support xpu"}},{"before":null,"after":"7f7cea32bc7b13ec8b3981b1a4616ed5d5dc48a3","ref":"refs/heads/yizhou/support_xpu","pushedAt":"2023-07-17T09:13:52.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"YizhouZ","name":"Yizhou Wang","path":"/YizhouZ","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/86939711?s=80&v=4"},"commit":{"message":"flash attention and rope","shortMessageHtmlLink":"flash attention and rope"}}],"hasNextPage":false,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEJbUjlwA","startCursor":null,"endCursor":null}},"title":"Activity ยท YizhouZ/Megatron-DeepSpeed"}