# Setup

In [1]:
!pip install transformers datasets
!pip install torch

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m4.2/7.2 MB[0m [31m128.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.2/7.2 MB[0m [31m150.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m90.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.13.1-py3-none-any.whl (486 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m31.0 

# GPU Info

In [2]:
# import
print("GPU Information for PyTorch")
print("   Version of torch: ",end="")
import torch
import os

# AMD ROCm requires environmental override to prevent segfault
sim = ""
if "rocm" in torch.__version__:
    os.environ["HSA_OVERRIDE_GFX_VERSION"] = "10.3.0"
    sim = "(ROCm)"

print(f" {torch.__version__}")
print()
print("GPU Details")

if torch.cuda.is_available():
    # Get number of GPUs available
    gpus = torch.cuda.device_count()

    # Get index of currently selected device
    deviceno = torch.cuda.current_device()
    name = torch.cuda.get_device_name(deviceno)

    # Returns the global free and total GPU memory occupied for a given device using cudaMemGetInfo.
    (mem_free,gpu_mem) = torch.cuda.mem_get_info()

    mem_free = mem_free / 1024**3
    gpu_mem = gpu_mem / 1024**3

    print(f"   Device #{deviceno}: {name}")
    print(f"   Type: cuda {sim}")
    print(f"   GPUs: {gpus}")
    print()
    print("Memory")
    print(f"   Global Free Memory: {mem_free} GB")
    print(f"   GPU Memory: {gpu_mem} GB")
    print('   Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('   Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
    torch_device = torch.device("cuda")

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
if torch.backends.mps.is_available():
    print("   Device: Apple Silicon Found")
    print(f"   MPS (Metal Performance Shader) built: {torch.backends.mps.is_built()}")
    print(f"   MPS available: {torch.backends.mps.is_available()}")
    torch_device = torch.device("mps")

# No GPUs Available
if not (torch.cuda.is_available() or torch.backends.mps.is_available()):
    print("   ** No GPU support found **")
    print("   Device: CPU")
    torch_device = torch.device("cpu")

# Run a simple PyTorch test
print()
print(f"PyTorch Test with {torch_device} - Random 4x4 Array\n")
random_array = torch.randint(low=0, high=10000, size=(4, 4), device=torch_device)
print(random_array)


GPU Information for PyTorch
   Version of torch:  2.0.1+cu118

GPU Details
   Device #0: Tesla T4
   Type: cuda 
   GPUs: 1

Memory
   Global Free Memory: 14.64727783203125 GB
   GPU Memory: 14.74786376953125 GB
   Allocated: 0.0 GB
   Cached:    0.0 GB

PyTorch Test with cuda - Random 4x4 Array

tensor([[1725, 8599, 4817, 6968],
        [ 124, 7518, 7481,  425],
        [9349, 1799, 8356, 7269],
        [5477,  713, 8610, 7819]], device='cuda:0')


# Sentiment

In [3]:
# import
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import transformers
transformers.logging.set_verbosity_error()
import sys
import textwrap

In [4]:
# load models
print("Loading transformer...")
model_id = "cardiffnlp/twitter-roberta-base-sentiment-latest"
print(f" * {model_id}")
classifier = transformers.pipeline("sentiment-analysis", model=model_id)

# get sentiments
user_input = ["I love to go to the beach",
              "I hit my head on the nail",
              "I am angry with her",
              "The sun was yellow"]
for phrase in user_input:
  sentiment_score = classifier(phrase)
  print(f"Phrase: {phrase}\nSentiment score: {sentiment_score}\n")


Loading transformer...
 * cardiffnlp/twitter-roberta-base-sentiment-latest


Downloading (…)lve/main/config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Phrase: I love to go to the beach
Sentiment score: [{'label': 'positive', 'score': 0.9762992262840271}]

Phrase: I hit my head on the nail
Sentiment score: [{'label': 'negative', 'score': 0.6980270147323608}]

Phrase: I am angry with her
Sentiment score: [{'label': 'negative', 'score': 0.9016545414924622}]

Phrase: The sun was yellow
Sentiment score: [{'label': 'neutral', 'score': 0.6344175934791565}]



# GPT-2 Text Generation

In [5]:
def generate(phrase):
    torch_device = torch.device("cuda")
    generator = transformers.pipeline('text-generation',
                                      model='gpt2-large',
                                      device="cuda:0")
    transformers.set_seed(42)
    # Produce 5 different truncated responses
    #output = generator(phrase, max_length=30, num_return_sequences=5
    output = generator(phrase, max_length=200, num_return_sequences=1)
    return output

# main
wrapper = textwrap.TextWrapper(width=60)
prompt = "As a artificial intellignet agent, my goal is"
output = wrapper.fill(generate(prompt)[0]['generated_text'])
print(f"Generated: \n{output}")


Downloading (…)lve/main/config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.25G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Generated: 
As a artificial intellignet agent, my goal is for you to
discover the secrets behind our systems."  "You're speaking
of the human mind? You're a clever boy… I expect this one is
what is most interesting… how you perceive the
possibilities… I see. Your mind's already opened." She had
her eyes closed, and I could smell her perfume as she had
said it, but I couldn't tell if it was in the air or from
her hair… she must have used a cologne before going to
sleep.  "That's fine, it's not a big deal if you're
interested. Your mind won't be completely blank, I'll just
show you what I've programmed into this software. We're on
the surface, right? It's not necessary to use it anymore;
the software can be used for anything. Even sleep."  She
smiled brightly, and I felt a little like laughing. Before I
could, she raised another hand and


# Text Summarization

In [6]:
# grab a test text file
import requests
response = requests.get("https://raw.githubusercontent.com/jasonacox/ProtosAI/master/pottery.txt")
with open("pottery.txt", 'w', encoding='utf-8') as file:
   file.write(response.text)

In [7]:
# title
print("Summarizer")

# load models
print("\nLoading transformer...")
model_id = "sshleifer/distilbart-cnn-12-6"
print(f" * {model_id}")
summarizer = transformers.pipeline("summarization",
                                   model=model_id,
                                   device="cuda:0")

# read text file
def read_text_file(file_path):
    print(f"\nReading {file_path}...")
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# print output
def print_summary(text):
    wrapper = textwrap.TextWrapper(width=80)
    num_lines = len(text.split('\n'))
    num_words = len(text.split())
    num_characters = len(text)

    print(f"Number of lines: {num_lines}")
    print(f"Number of words: {num_words}")
    print(f"Number of characters: {num_characters}")

    print("\nSummarizing...")
    summary = summarizer(text)
    text = wrapper.fill(summary[0]['summary_text'])
    print(f"Text: {text}")
    num_lines = len(text.split('\n'))
    num_words = len(text.split())
    num_characters = len(text)
    print(f"Number of lines: {num_lines}")
    print(f"Number of words: {num_words}")
    print(f"Number of characters: {num_characters}")


# main
file_path = "pottery.txt"
text = read_text_file(file_path)
print_summary(text)

Summarizer

Loading transformer...
 * sshleifer/distilbart-cnn-12-6


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]


Reading pottery.txt...
Number of lines: 14
Number of words: 566
Number of characters: 3416

Summarizing...
Text:  The key to becoming a great artist, writer, musician, etc., is to keep
creating! Keep drawing, keep writing, keep playing! Quality emerges from the
quantity of practice and continuous learning that makes them more perfect . The
prize of perfection comes by delivering and learning, says Jason Cox .
Number of lines: 4
Number of words: 49
Number of characters: 299


# Transcribe

In [8]:
# grab the test audio file
response = requests.get("https://github.com/jasonacox/ProtosAI/raw/master/test.wav")
with open("test.wav", 'wb') as file:
   file.write(response.content)

In [9]:
# import
from transformers import pipeline

def transcribe(filename):
    wrapper = textwrap.TextWrapper(width=80)
    print("\nLoading model...")
    pipe = pipeline(model="facebook/wav2vec2-base-960h")
    print(f"\nTranscribing {filename}...")
    transcript = pipe(filename, chunk_length_s=10, stride_length_s=(4, 2))
    # stride_length_s is a tuple of the left and right stride length.
    print(wrapper.fill(transcript['text']))

file_path = "test.wav"
transcribe(file_path)


Loading model...


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/159 [00:00<?, ?B/s]


Transcribing test.wav...
HELLO THIS IS A TEST
