In [1]:
import torch

In [2]:
from transformers import GemmaTokenizer, AutoModelForCausalLM

In [3]:
gpu = torch.device('cuda:0')

In [4]:
model_id = "google/codegemma-2b"
tokenizer = GemmaTokenizer.from_pretrained(model_id)

In [5]:
print(tokenizer.tokenize('Winterknubbelbaum'))
print(tokenizer.tokenize("Schäfer"))
print(tokenizer.tokenize("Rennrad"))
print(tokenizer.tokenize("😊"))
print(tokenizer.tokenize("🫂"))

['Winter', 'kn', 'ub', 'bel', 'baum']
['Sch', 'ä', 'fer']
['R', 'enn', 'rad']
['😊']
['🫂']


In [6]:
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=gpu, torch_dtype=torch.float16)

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [36]:
prompt = '''def hello_world.'''

In [37]:
inputs = tokenizer(prompt, return_tensors="pt").to(gpu)

In [38]:
tokenizer.tokenize('Winterknubbelbaum')

['Winter', 'kn', 'ub', 'bel', 'baum']

In [45]:
prompt_len = inputs["input_ids"].shape[-1]
res = model(**inputs).logits[0][-1]
print(len(res))

res = torch.softmax(res, 0)
print(res)

res = torch.argmax(res)

print(res)

ninput = tokenizer.convert_ids_to_tokens([res])
print(ninput)

print("beam--------------------------------")
outputs = model.generate(**inputs, num_beams=8, max_new_tokens=256)
print(tokenizer.decode(outputs[0]))

print("top-p-------------------------")
outputs = model.generate(**inputs, do_sample=True, top_p=0.9, max_new_tokens=256)
print(tokenizer.decode(outputs[0]))


print("top-k--------------------------")
outputs = model.generate(**inputs, do_sample=True, top_k=3, max_new_tokens=256, temperature=2.0)
print(tokenizer.decode(outputs[0]))

#outputs = model.generate(**inputs, max_new_tokens=4000)
#print(tokenizer.decode(outputs[0]))

256000
tensor([2.8011e-11, 7.9222e-07, 8.9770e-07,  ..., 2.2032e-10, 4.9651e-10,
        3.1741e-11], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor(2158, device='cuda:0')
['py']
beam--------------------------------
<bos>def hello_world.py
<|fim_prefix|><|fim_suffix|><|fim_middle|>def hello_world():
    print("hello world")

hello_world()<|file_separator|><eos>
top-p-------------------------
<bos>def hello_world.py
<|fim_prefix|><|fim_suffix|><|fim_middle|>def hello_world():
    print("hello world")

def sum():
    print(1+2)<|file_separator|><eos>
top-k--------------------------
<bos>def hello_world.c

#include "hello_world"


int main()
{
    printf("Hello world\n");
    return 0;
<|file_separator|>hello_world/src/main.h
<|fim_prefix|>
void hello<|fim_suffix|><|fim_middle|>_world();<|file_separator|>def hello_world.c
#include <stdio.h>

void hello_wold() {
  printf("Hello world");
  
}<|file_separator|><eos>


In [1]:
import git

In [2]:
!git clone --bare https://github.com/pallets/flask.git

Cloning into bare repository 'flask.git'...
remote: Enumerating objects: 24783, done.[K
remote: Counting objects: 100% (131/131), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 24783 (delta 50), reused 100 (delta 33), pack-reused 24652[K
Receiving objects: 100% (24783/24783), 10.19 MiB | 15.83 MiB/s, done.
Resolving deltas: 100% (16595/16595), done.


In [3]:
repo = git.Repo('./flask.git')

In [4]:
head = repo.head

In [5]:
head.commit.hexsha

'11c15ddfeb6edcb0978d3407ed972ae441013177'

In [6]:
head.path

'HEAD'

In [7]:
repo.git

<git.cmd.Git at 0x7f87846fabc0>

In [8]:
repo.git_dir

'/workspace/flask.git'