### Observation about messages structure

system: how the model should behave, like "You are a helpful assistant", "You are a Python mentor"...
user: the user questions
assistant: the model response

In [None]:
# if this gives an "ERROR" about pip dependency conflicts, ignore it! It doesn't affect anything.

%pip install -q -U transformers datasets diffusers
%pip install soundfile

In [None]:
# Imports

import torch
# changing imports to work locally
# from google.colab import userdata
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import pipeline
from diffusers import DiffusionPipeline
from datasets import load_dataset
import soundfile as sf
from IPython.display import Audio
import os

In [None]:
# Load environment variables in a file called .env
# Print the key prefixes to help with any debugging

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
hugging_face_api_key = os.getenv('HUGGING_FACE_API_KEY')
print(hugging_face_api_key)

if hugging_face_api_key:
    print(f"HuggingFace API Key exists and begins {hugging_face_api_key[:8]}")
else:
    print("HuggingFace API Key not set")
    

In [None]:
# Sentiment Analysis

classifier = pipeline("sentiment-analysis")
result = classifier("I'm super excited to be on the way to LLM mastery!")
print(result)

In [None]:
# Named Entity Recognition

ner = pipeline("ner", grouped_entities=True)
result = ner("Barack Obama was the 44th president of the United States.")
print(result)

In [None]:
# Question Answering with Context

question_answerer = pipeline("question-answering")
result = question_answerer(question="Who was the 44th president of the United States?", context="Barack Obama was the 44th president of the United States.")
print(result)

In [None]:
# Text Summarization

summarizer = pipeline("summarization")
text = """The Hugging Face transformers library is an incredibly versatile and powerful tool for natural language processing (NLP).
It allows users to perform a wide range of tasks such as text classification, named entity recognition, and question answering, among others.
It's an extremely popular library that's widely used by the open-source data science community.
It lowers the barrier to entry into the field by providing Data Scientists with a productive, convenient way to work with transformer models.
"""
summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
print(summary[0]['summary_text'])

In [None]:
# Translation

translator = pipeline("translation_en_to_fr")
result = translator("The Data Scientists were truly amazed by the power and simplicity of the HuggingFace pipeline API.")
print(result[0]['translation_text'])

In [None]:
# Another translation, showing a model being specified
# All translation models are here: https://huggingface.co/models?pipeline_tag=translation&sort=trending

translator = pipeline("translation_en_to_es", model="Helsinki-NLP/opus-mt-en-es")
result = translator("The Data Scientists were truly amazed by the power and simplicity of the HuggingFace pipeline API.")
print(result[0]['translation_text'])

In [None]:
# Classification

classifier = pipeline("zero-shot-classification")
result = classifier("Hugging Face's Transformers library is amazing!", candidate_labels=["technology", "sports", "politics"])
print(result)

In [None]:
# Text Generation

generator = pipeline("text-generation")
result = generator("If there's one thing I want you to remember about using HuggingFace pipelines, it's")
print(result[0]['generated_text'])

In [None]:
# Image Generation - need NVidia to install pytorch with cuda

image_gen = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2",
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16"
    )

text = "A class of Data Scientists learning about AI, in the surreal style of Salvador Dali"
image = image_gen(prompt=text).images[0]
image

In [None]:
# Audio Generation - need NVidia to install pytorch with cuda

synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts", device='cuda')

embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

speech = synthesiser("Hi to an artificial intelligence engineer, on the way to mastery!", forward_params={"speaker_embeddings": speaker_embedding})

sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
Audio("speech.wav")