using ChatGPTTokenizer;
using var tokenizer = new BpeTokenizer(File.ReadAllText("merges.txt"));
string text = """
print("Hello world!")
""";
var tokens = tokenizer.Encode(text);
Console.WriteLine($"count: {tokens.Length}"); // count: 6
Console.WriteLine(string.Join(',', tokens.Select(t => t.Id))); // 4798,7203,15496,995,2474,8
-
Notifications
You must be signed in to change notification settings - Fork 0
ibukisaar/ChatGPTTokenizer
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
About
openai GPT2TokenizerFast的C#实现
Topics
Resources
Stars
Watchers
Forks
Releases
No releases published
Packages 0
No packages published