In [1]:
%load_ext dotenv
%dotenv
%pip install transformers
!huggingface-cli login --token $HUGGING_FACE_TOKEN


Note: you may need to restart the kernel to use updated packages.


In [3]:
# imports

from transformers import AutoTokenizer, T5ForConditionalGeneration

In [12]:
# load model

# model_name = "grammarly/coedit-large" # 770B
model_name = "grammarly/coedit-xl" # 3B
# model_name = "grammarly/coedit-xll" # 11B

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")

# print("Tokenizer file path:", tokenizer.name_or_path)
# print("Model file path:", model.pretrained_init_configuration["cache_dir"])



In [6]:
model = T5ForConditionalGeneration.from_pretrained(model_name)

# for param_tensor in model.state_dict():
#     print(param_tensor, "\t", model.state_dict()[param_tensor].size())

total_params = sum(p.numel() for p in model.parameters())
print(f'Total Parameters: {total_params}')

total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Trainable Parameters: {total_trainable_params}')

total_memory_GB = total_params * 4 / (1024 ** 3)
print(f'Estimated model memory: {total_memory_GB:.2f} GB')

Total Parameters: 2849642496
Trainable Parameters: 2849642496
Estimated model memory: 10.62 GB


In [9]:
# available devicesimport torch

import torch

if torch.cuda.is_available():
    print("Number of GPUs available:", torch.cuda.device_count())

    for i in range(torch.cuda.device_count()):
        device = torch.device(f"cuda:{i}")
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
        print(f"Memory Usage:")
        print(f"Allocated: {torch.cuda.memory_allocated(device)/1024**3:.2f} GB")
        print(f"Reserved:   {torch.cuda.memory_reserved(device)/1024**3:.2f} GB")
else:
    print("No GPUs available, running on CPU.")

# torch.cuda.empty_cache()
print(f"{torch.cuda.memory_allocated(device)}")

Number of GPUs available: 1
Device 0: NVIDIA GeForce RTX 3080
Memory Usage:
Allocated: 0.00 GB
Reserved:   0.00 GB
0


In [14]:
from unittest.util import _MAX_LENGTH
import torch
from transformers import AutoTokenizer, T5ForConditionalGeneration
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset

# torch.cuda.empty_cache()
print(f"Allocated: {torch.cuda.memory_allocated(device)/1024**3:.2f} GB")
print(f"Reserved:   {torch.cuda.memory_reserved(device)/1024**3:.2f} GB")

pipe = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device_map="auto", max_length=128)
result = pipe("Fix grammatical errors in this sentence: When I grow up, I start to understand what he said is quite right")
print(f"Result: {result}")

print(f"Allocated: {torch.cuda.memory_allocated(device)/1024**3:.2f} GB")
print(f"Reserved: {torch.cuda.memory_reserved(device)/1024**3:.2f} GB")

Allocated: 7.71 GB
Reserved:   7.73 GB




Allocated: 7.71 GB
Reserved: 7.73 GB
Result: [{'generated_text': 'As I grow up, I start to understand what he said is quite right.'}]


In [13]:
input_text = "Fix grammatical errors in this sentence: When I grow up, I start to understand what he said is quite right."
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
outputs = model.generate(input_ids, max_length=256)
edited_text = tokenizer.decode(outputs[0], skip_special_tokens=True, max_length=128)
print(edited_text)

As I grow up, I start to understand what he said is quite right.
