{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":729282565,"defaultBranch":"main","name":"llama-mistral","ownerLogin":"dzhulgakov","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2023-12-08T19:57:05.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/17890620?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1702065427.6134112","currentOid":""},"activityList":{"items":[{"before":"00ab193ec4a8f7702f952d3438951765063eb00f","after":"cecee4473fa2195cab45f02d27401b3d1aa597a5","ref":"refs/heads/main","pushedAt":"2023-12-09T03:45:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"dzhulgakov","name":"Dmytro Dzhulgakov","path":"/dzhulgakov","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/17890620?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"ed2acd58e8070e39ca41736b6b5230ced4cd7e80","after":"00ab193ec4a8f7702f952d3438951765063eb00f","ref":"refs/heads/main","pushedAt":"2023-12-09T02:28:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"dzhulgakov","name":"Dmytro Dzhulgakov","path":"/dzhulgakov","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/17890620?s=80&v=4"},"commit":{"message":"Swap topk and softmax in MoE\n\nPer @bjoernp's results this gives better metrics: https://huggingface.co/DiscoResearch/mixtral-7b-8expert/discussions/1\n\nOriginal implementation is ambiguous and Megablocks repo does\nsoftmax+topk. But topk+softmax seems better on metrics","shortMessageHtmlLink":"Swap topk and softmax in MoE"}},{"before":"009f58aafa083a3d47013eeee9f729b34ce3e53e","after":"ed2acd58e8070e39ca41736b6b5230ced4cd7e80","ref":"refs/heads/main","pushedAt":"2023-12-08T23:04:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"dzhulgakov","name":"Dmytro Dzhulgakov","path":"/dzhulgakov","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/17890620?s=80&v=4"},"commit":{"message":"Make number of GPUs configurable","shortMessageHtmlLink":"Make number of GPUs configurable"}},{"before":"5d094109802f5837ed8c6299e3118507922132cc","after":"009f58aafa083a3d47013eeee9f729b34ce3e53e","ref":"refs/heads/main","pushedAt":"2023-12-08T22:21:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"dzhulgakov","name":"Dmytro Dzhulgakov","path":"/dzhulgakov","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/17890620?s=80&v=4"},"commit":{"message":"Update RoPE Theta to 1e6 per Mistral team comment\n\nSee: https://discord.com/channels/1144547040454508606/1144547040928481394/1182795741341691934\n\nHard to tell from a spot check whether results are better or not. But\nstill coherent.","shortMessageHtmlLink":"Update RoPE Theta to 1e6 per Mistral team comment"}},{"before":"ef351e9cd9496c579bf9f2bb036ef11bdc5ca3d2","after":"5d094109802f5837ed8c6299e3118507922132cc","ref":"refs/heads/main","pushedAt":"2023-12-08T20:05:07.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"dzhulgakov","name":"Dmytro Dzhulgakov","path":"/dzhulgakov","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/17890620?s=80&v=4"},"commit":{"message":"Very hacky Mixtral 8x7B implementation","shortMessageHtmlLink":"Very hacky Mixtral 8x7B implementation"}}],"hasNextPage":false,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAADxeFzxQA","startCursor":null,"endCursor":null}},"title":"Activity ยท dzhulgakov/llama-mistral"}