# Hugging Face Cached Models

In [1]:
from transformers import pipeline
import torch

# GPU Timing (using GPU 1)
device_id = 1 if torch.cuda.is_available() else -1

In [None]:
# GPT-2 XL (1.5B parameters)
# Initialize GPT-2 XL text generation pipeline
pipe_gpt2xl = pipeline('text-generation', model='gpt2-xl')
# Example usage
output_gpt2xl = pipe_gpt2xl("In a world where technology rules,", max_length=50, truncation=True, num_return_sequences=1)
print(output_gpt2xl)


In [2]:
# GPT-Neo (2.7B parameters)
# Initialize GPT-Neo (2.7B) text generation pipeline
pipe_gptneo = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B', device=device_id)

# Example usage
output_gptneo = pipe_gptneo("Once upon a time, deep in the forest,", max_length=50, truncation=True, num_return_sequences=1)
print(output_gptneo)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Once upon a time, deep in the forest, it was never known whether or not the old woman was still living. To the children—all but the youngest—she was no more than a vague apparition, a vague sound of crumpling'}]


In [None]:
# GPT-J (6B parameters)
# Initialize GPT-J (6B) text generation pipeline
pipe_gptj = pipeline('text-generation', model='EleutherAI/gpt-j-6B', device=device_id)

In [None]:
# Example usage
output_gptj = pipe_gptj("The mysteries of the universe began to unravel when,", max_length=50, truncation=True, num_return_sequences=1)
print(output_gptj)

In [None]:
# BLOOM (7B parameters)
# Initialize BLOOM-7B text generation pipeline
pipe_bloom7b = pipeline('text-generation', model='bigscience/bloom-7b1', device=device_id)

In [None]:
# Example usage
output_bloom7b = pipe_bloom7b("At the dawn of the AI age,", max_length=50, truncation=True, num_return_sequences=1)
print(output_bloom7b)

# Utilize a GPU

In [None]:
import time
from transformers import pipeline, set_seed
import torch
set_seed(42)
# Text for testing
input_text = "In a world where technology rules,"

# GPU Timing (using GPU 1)
device_id = 1 if torch.cuda.is_available() else -1
pipe_gpt2xl_gpu = pipeline('text-generation', model='gpt2-xl', device=device_id)

start_time_gpu = time.time()  # Start the timer
output_gpu = pipe_gpt2xl_gpu(input_text, max_length=50, truncation=True, num_return_sequences=1)
end_time_gpu = time.time()    # End the timer

print("GPU output:", output_gpu)
print(f"GPU processing time: {end_time_gpu - start_time_gpu:.2f} seconds")


# CPU Timing (forcing CPU usage)
pipe_gpt2xl_cpu = pipeline('text-generation', model='gpt2-xl', device=-1)

start_time_cpu = time.time()  # Start the timer
output_cpu = pipe_gpt2xl_cpu(input_text, max_length=50, truncation=True, num_return_sequences=1)
end_time_cpu = time.time()    # End the timer

print("CPU output:", output_cpu)
print(f"CPU processing time: {end_time_cpu - start_time_cpu:.2f} seconds")


In [3]:
!nvidia-smi # GPU status and available memory

Tue Oct 29 11:08:33 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.161.07             Driver Version: 535.161.07   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A6000               On  | 00000000:2E:00.0 Off |                  Off |
| 30%   31C    P8              27W / 300W |  17062MiB / 49140MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000               On  | 00000000:41:0

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [None]:
# !kill -9 237895 # if too many processes are going on a GPU, you can find your PID number and stop the process. Freeing memory.