In [1]:
#check if we have transformers set up for this notebook
!pip show transformers

Name: transformers
Version: 4.34.0
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /home/cdsw/.local/lib/python3.9/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: peft, trl


In [2]:
# Check GPU Availability
!nvidia-smi

Fri Nov 24 13:28:20 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.54.03              Driver Version: 535.54.03    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       On  | 00000000:00:1E.0 Off |                    0 |
| N/A   20C    P8              12W /  70W |      2MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [1]:
#set up Torch and the tokenzier for our LLM
from transformers import AutoTokenizer, AutoModel, set_seed, AutoModelForCausalLM
import torch

torch.set_default_tensor_type(torch.cuda.FloatTensor)
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-1b7")

#Download the Bloom model 
model_lm = AutoModelForCausalLM.from_pretrained("bigscience/bloom-1b7")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# We can now check the name of our model as follows 
model_lm.__class__

transformers.models.bloom.modeling_bloom.BloomForCausalLM

# Trying some Multi shot Prompts on Bloom Model 

In [7]:
!pip show accelerate

Name: accelerate
Version: 0.23.0
Summary: Accelerate
Home-page: https://github.com/huggingface/accelerate
Author: The HuggingFace team
Author-email: sylvain@huggingface.co
License: Apache
Location: /home/cdsw/.local/lib/python3.9/site-packages
Requires: huggingface-hub, numpy, packaging, psutil, pyyaml, torch
Required-by: peft, trl


In [8]:
#!pip install -q accelerate  bitsandbytes

In [9]:
from datasets import load_dataset
from evaluate import load as load_metric

dataset = load_dataset("glue", "sst2", split="validation").to_pandas()
metric = load_metric("glue", "sst2")

Found cached dataset glue (/home/cdsw/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


In [10]:
# let us have quick look at the data
!python -c "from datasets import load_dataset; print(load_dataset('squad', split='train')[0])"

Downloading builder script: 100%|██████████| 5.27k/5.27k [00:00<00:00, 27.1MB/s]
Downloading metadata: 100%|████████████████| 2.36k/2.36k [00:00<00:00, 15.2MB/s]
Downloading readme: 100%|██████████████████| 7.67k/7.67k [00:00<00:00, 29.5MB/s]
Downloading and preparing dataset squad/plain_text to /home/cdsw/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453...
Downloading data files:   0%|                             | 0/2 [00:00<?, ?it/s]
Downloading data:   0%|                             | 0.00/8.12M [00:00<?, ?B/s][A
Downloading data:  89%|█████████████████▋  | 7.20M/8.12M [00:00<00:00, 72.0MB/s][A
Downloading data: 14.5MB [00:00, 72.8MB/s]                                      [A
Downloading data: 21.8MB [00:00, 72.3MB/s][A
Downloading data: 30.3MB [00:00, 72.9MB/s][A
Downloading data files:  50%|██████████▌          | 1/2 [00:00<00:00,  1.43it/s]
Downloading data: 4.85MB [00:00, 71.3MB/s]                         

In [12]:
# Another way to see the data
dataset.head()

Unnamed: 0,sentence,label,idx
0,it 's a charming and often affecting journey .,1,0
1,unflinchingly bleak and desperate,0,1
2,allows us to hope that nolan is poised to emba...,1,2
3,"the acting , costumes , music , cinematography...",1,3
4,"it 's slow -- very , very slow .",0,4


In [13]:
#!pip install -q  xformers
!pip show xformers

[0m

In [14]:

"""
The transformers library simplifies accessing LLMs. 
There are 3 steps :
    1. Obtain a handle to the pretrained model
    2. Obtain a handle to the tokenizer ( which converts our word / sentence inputs into vectors
    3. generate a pipeline that can then be used to query the Model
"""

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

device = "cuda:0" if torch.cuda.is_available() else "cpu"

model_name = "bigscience/bloom-1b7"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

In [19]:
"""
    Here we are using a simplified Instruction approach to "teach" the language model to classify a review as  postive or negative
    The first three Instructions explain how this works to the model and then the subsequently we ask the model to use the above instructions
    to perform the task
"""

def prompt_1(review: str) -> str:
 return f'''Review: The movie was horrible
Sentiment: Negative

Review: The movie was the best movie I have watched all year!!!
Sentiment: Positive

Review: The film was a disaster
Sentiment: Negative

Review: {review}
Sentiment:'''


def generate_sentiment(review: str) -> str:
 generated_text = generator(prompt_1(review), max_new_tokens=1)[0]['generated_text']
 return generated_text.split()[-1]

In [21]:

"""
    How well does our model prediction work ? 
    We apply it against a dataset to check the accuracy of model prediction.
    The metric simply computes the prediction with the ground truth to come up with an accuracy metric.
"""

# Our sentiment classifier is applied to the dataset
dataset["prediction"] = dataset["sentence"].apply(generate_sentiment)

# To compute accuracy we need to make the predictions into a numerical value : Positives are 1 and negatives are 0
dataset["prediction_int"] = dataset["prediction"].str.lower().map({"negative": 0, "positive": 1}).fillna(-1)

# Lets us check how well we did against our labelled data.
# we will ignore the warning based on the following : https://github.com/huggingface/transformers/issues/22387
accuracy = metric.compute(predictions=dataset["prediction_int"], references=dataset["label"])["accuracy"]
print(accuracy)



0.6628440366972477


In [23]:
# Some additional Prompt Examples 
def prompt_2(review: str) -> str:
  return f'''Review: This has been the worst trade deal in the history of trade deals, maybe ever
  Sentiment: Negative

  Review: Amazing introduction assignment on how to use large language models
  Sentiment: Positive

  Review: This code is full of bugs, it's impossible to run it
  Sentiment: Negative

  Review: {review}
  Sentiment:'''




In [24]:
# Some more additional Prompt Examples 
def prompt_3(review: str) -> str:
  return f'''The sentiment of the sentence "I hate this world." is negative

  The sentiment of the sentence "I love you all!" is positive

  The sentiment of the sentence "It will never work." is negative

  The sentiment of the sentence {review} is'''

In [25]:
"""
We will see if any change in accuracy is taking place based on our changes in prompts. This is usually an 
experimentative approach to see how your model performs to different inputs. 
Looks like Prompt 2 is performing better. Accuracy is 0.73
"""
def generate_sentiment(review: str) -> str:
 generated_text = generator(prompt_2(review), max_new_tokens=1)[0]['generated_text']
 return generated_text.split()[-1]

dataset["prediction"] = dataset["sentence"].apply(generate_sentiment)
dataset["prediction_int"] = dataset["prediction"].str.lower().map({"negative": 0, "positive": 1}).fillna(-1)

accuracy = metric.compute(predictions=dataset["prediction_int"], references=dataset["label"])["accuracy"]
print(accuracy)



0.7350917431192661


In [26]:
"""
We will see if any change in accuracy is taking place based on our changes in prompts. This is usually an 
experimentative approach to see how your model performs to different inputs. 
Looks like Prompt 3 is worse off and we see now an accuracy of about 0.31
"""

def generate_sentiment(review: str) -> str:
 generated_text = generator(prompt_3(review), max_new_tokens=1)[0]['generated_text']
 return generated_text.split()[-1]

dataset["prediction"] = dataset["sentence"].apply(generate_sentiment)
dataset["prediction_int"] = dataset["prediction"].str.lower().map({"negative": 0, "positive": 1}).fillna(-1)

accuracy = metric.compute(predictions=dataset["prediction_int"], references=dataset["label"])["accuracy"]
print(accuracy)



0.3176605504587156


In [28]:
# lets us check our dataset predictions visually
dataset.head()

Unnamed: 0,sentence,label,idx,prediction,prediction_int
0,it 's a charming and often affecting journey .,1,0,positive,1.0
1,unflinchingly bleak and desperate,0,1,is,-1.0
2,allows us to hope that nolan is poised to emba...,1,2,a,-1.0
3,"the acting , costumes , music , cinematography...",1,3,a,-1.0
4,"it 's slow -- very , very slow .",0,4,negative,0.0


In [29]:
# free up the memory
import gc
del model_name
del tokenizer
del generator 
del dataset


gc.collect()
torch.cuda.empty_cache()

In [30]:
# Let us monitor memory
import torch

# Retrieve GPU memory statistics
memory_stats = torch.cuda.memory_stats()
# Retrieve maximum GPU memory allocated by PyTorch
max_memory_allocated = torch.cuda.max_memory_allocated()
# Calculate available GPU memory
total_memory = torch.cuda.get_device_properties(0).total_memory
available_memory = total_memory - memory_stats["allocated_bytes.all.current"]

# Print the result
print(f"total_memory: {total_memory / 1024**3:.2f} GB")
print(f"Peak GPU memory allocated by PyTorch: {max_memory_allocated / 1024**3:.2f} GB")
print(f"Available GPU memory: {available_memory / 1024**3:.2f} GB")


## Make sure you are able to Total Memory of 14GB before moving to the next assisgnment, else restart the session

total_memory: 14.58 GB
Peak GPU memory allocated by PyTorch: 8.61 GB
Available GPU memory: 6.07 GB
