# LLM Inference  - Text Classification using Zero Shot Prompting

## Setup Environment

In [1]:
import os
from datasets import load_dataset,DatasetDict
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

HF_TOKEN = os.getenv("HF_TOKEN")
WANDB_API_KEY = os.getenv("WANDB_API_KEY")

from vllm import LLM, SamplingParams
import random

## Instantiate a LLM 

In [2]:
llm = LLM(
        model="meta-llama/Meta-Llama-3-70B-Instruct",
        tensor_parallel_size=4,
        trust_remote_code=True,
        enforce_eager=True,
        gpu_memory_utilization=0.99,
        enable_prefix_caching=True
)

2024-05-01 14:17:46,807	INFO worker.py:1749 -- Started a local Ray instance.


INFO 05-01 14:17:48 llm_engine.py:98] Initializing an LLM engine (v0.4.1) with config: model='meta-llama/Meta-Llama-3-70B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3-70B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=4, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


INFO 05-01 14:17:56 utils.py:608] Found nccl from library /home/u.ap164907/.config/vllm/nccl/cu12/libnccl.so.2.18.1
[36m(RayWorkerWrapper pid=2245447)[0m INFO 05-01 14:17:56 utils.py:608] Found nccl from library /home/u.ap164907/.config/vllm/nccl/cu12/libnccl.so.2.18.1
INFO 05-01 14:18:00 selector.py:28] Using FlashAttention backend.
[36m(RayWorkerWrapper pid=2245447)[0m INFO 05-01 14:18:00 selector.py:28] Using FlashAttention backend.
INFO 05-01 14:18:01 pynccl_utils.py:43] vLLM is using nccl==2.18.1
[36m(RayWorkerWrapper pid=2245447)[0m INFO 05-01 14:18:01 pynccl_utils.py:43] vLLM is using nccl==2.18.1
[36m(RayWorkerWrapper pid=2245656)[0m INFO 05-01 14:17:56 utils.py:608] Found nccl from library /home/u.ap164907/.config/vllm/nccl/cu12/libnccl.so.2.18.1[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplicatio

## Common function for all dataset

In [3]:
def zero_shot_classification(dataset_name, prefix):
    # Load the dataset
    dataset = load_dataset(dataset_name)

    # Iterate over the dataset splits (train, test, validation)
    modified_dataset_dict = {}
    for split in ["train", "test", "validation"]:
        # Get the texts and labels from the current split
        texts = dataset[split]["text"]
        labels = dataset[split]["label"]

        # Generate the prompts for each Text
        generating_prompts = [prefix + "Text: " + text + "\nResponse: " for text in texts]

        # Set the sampling parameters
        sampling_params = SamplingParams(temperature=0, max_tokens=1)

        # Generate the sentiment labels for each text
        outputs = llm.generate(generating_prompts, sampling_params)
        predicted_label = []
        for output in outputs:
            try:
                predicted_label.append(int(output.outputs[0].text))
            except ValueError:
                predicted_label.append(-1)

        # Add the predicted labels to the dataset
        modified_dataset = dataset[split].add_column("predicted_label", predicted_label)
        modified_dataset_dict[split] = modified_dataset

    # Create a DatasetDict with the modified datasets
    return DatasetDict(modified_dataset_dict)

## Twitter Dataset

In [3]:
twitter_dataset = load_dataset("MAdAiLab/twitter_disaster")
twitter_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8700
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1088
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1088
    })
})

### Prefix 1 - Elaborative prefix

In [7]:
prefix = """
You are an expert in sentiment analysis, with a deep understanding of natural language and human emotions.
Your task is to analyze the sentiment of the given text and classify it as either positive or negative.
When analyzing the sentiment, consider the overall tone, word choice, and emotional connotations within the text.
Positive sentiment typically conveys happiness, joy, approval, or praise, while negative sentiment expresses sadness, anger, criticism, or disappointment.
Provide your analysis in a concise and definitive manner, outputting either the number '1' if positive or '0' if negative based on your assessment of the sentiment expressed in the text.
Do not provide any additional commentary or explanation beyond the sentiment classification itself.
"""

In [17]:
# Get the training set
train_set = twitter_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 5))

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "Text: " + example['text'] + "\nResponse: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 5/5 [00:00<00:00, 30.69it/s]

Prefix: 
You are an expert in sentiment analysis, with a deep understanding of natural language and human emotions.
Your task is to analyze the sentiment of the given text and classify it as either positive or negative.
When analyzing the sentiment, consider the overall tone, word choice, and emotional connotations within the text.
Positive sentiment typically conveys happiness, joy, approval, or praise, while negative sentiment expresses sadness, anger, criticism, or disappointment.
Provide your analysis in a concise and definitive manner, outputting either the number '1' if positive or '0' if negative based on your assessment of the sentiment expressed in the text.
Do not provide any additional commentary or explanation beyond the sentiment classification itself.

Example 1:
Text: E1.1.2 Particulate=Break up of Solid Combust Fossil Fuel Voltaic Active Forest Fire Biological VOC=Petroleum CH4 Bacteria Decomposition
Response: 0
Actual label: 1
------------------------------------------




### Prefix 2 - Simple prefix

In [38]:
prefix = """
“You are a classification model. 
Based on the given tweet, you need to predict the most relevant category label from 'positive' or 'negative'.
One tweet has only one label. 
"""

In [46]:
# Get the training set
train_set = twitter_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 10))

sampling_params = SamplingParams(temperature=0)

generating_prompts = [prefix + "Text: " + example['text'] + "\nResponse: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 10/10 [00:01<00:00,  8.26it/s]

Prefix: 
“You are a classification model. 
Based on the given tweet, you need to predict the most relevant category label from 'positive' or 'negative'.
One tweet has only one label. 

Example 1:
Text: INVESTMENT NEWS Keurig Green Mountain Inc. Third-Quarter Earnings: Shares Sinking After-Hours - Stocks in the NewÛ_ http://t.co/GtdNW1SpVi
Response: negative”
--------------------------------------------------

Example 2:
Text: New music from @ApolloBrown featuring M.O.P.? 'Detonate' taken off his album 'Grandeur' coming soon - http://t.co/m1xYkEcRzr
Response: positive”
--------------------------------------------------

Example 3:
Text: Hollywood movie about trapped miners released in Chile
Response: positive”
--------------------------------------------------

Example 4:
Text: RT : Why Sweden Isn't Venezuela: There have been a few days of rioting in Venezuela with the riots directed at grÛ_ http://t.co/GJfd85vuf2
Response: negative”
--------------------------------------------------





In [None]:
twitter_modified = zero_shot_classification(
    dataset_name="MAdAiLab/twitter_disaster",
    prefix=prefix
)

In [10]:
twitter_modified

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 8700
    })
    test: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 1088
    })
    validation: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 1088
    })
})

In [11]:
twitter_modified.save_to_disk("./output/twitter_predicted")

Saving the dataset (0/1 shards):   0%|          | 0/8700 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1088 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1088 [00:00<?, ? examples/s]

## Patent Classification Dataset

In [27]:
patent_dataset = load_dataset("MAdAiLab/patent_classification")
patent_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
})

### Prefix 1 : Elaborative Prefix

In [28]:
prefix = """
You are an expert in patent classification, with a deep understanding of technical domains and patent categorization.
Your task is to analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology
When analyzing the patent, consider the technical field, invention type, and application area described in the text.
Provide your classification in a concise and definitive manner, outputting the corresponding class label (0-8) based on your assessment of the patent's category.
Do not provide any additional commentary or explanation beyond the classification itself.
"""

In [29]:
# Get the training set
train_set = patent_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 5))

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "Text: " + example['text'] + "\nResponse: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 5/5 [00:00<00:00,  5.62it/s]

Prefix: 
You are an expert in patent classification, with a deep understanding of technical domains and patent categorization.
Your task is to analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology
When analyzing the patent, consider the technical field, invention type, and application area described in the text.
Provide your classification in a concise and definitive manner, outputting the corresponding class label (0-8) based on your assessment of the patent's category.
Do not provide any additional commentary or explanation beyond the classification itself.

Example 1:
Text: apparatus for coupling a light beam from a light source to a light fibre with reduced heating




### Prefix 2 - Simple prefix

In [30]:
prefix = """
Analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology
Provide your classification in a concise and definitive manner, outputting the corresponding class label (0-8). 
Do not provide any additional commentary or explanation beyond the classification itself.
"""

In [31]:
# Get the training set
train_set = patent_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 5))

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "Text: " + example['text'] + "\nResponse: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 5/5 [00:00<00:00,  6.37it/s]

Prefix: 
Analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology
Provide your classification in a concise and definitive manner, outputting the corresponding class label (0-8). 
Do not provide any additional commentary or explanation beyond the classification itself.

Example 1:
Text: a vehicle communication system is operable to calculate and compare vehicle statistics based on information received from a vehicle sensor . this statistic information can be used by the vehicle communication system to execute a plurality of statistic based games . additionally , relevant statistic information may be transmitted to a remote network for the purpose of playing games against o




### Prefix 2 - Very Simplistic prefix

In [32]:
prefix = """
You are a classification model. Based on the given an abstract, you need to predict the most relevant category label from below
Human Necessities,
Operations; Transporting,
Chemistry; Metallurgy,
Textiles; Paper,
Fixed Constructions,
Mechanical Engineering; Lightning; Heating; Weapons; Blasting,
Physics,
Electricity,
General tagging of new or cross-sectional technology. 
One article has only one label.
"""

In [36]:
# Get the training set
train_set = patent_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 10))

sampling_params = SamplingParams(temperature=0)

generating_prompts = [prefix + "\n ### Input abstract: " + example['text'] + "\n ### Output: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
# print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 10/10 [00:01<00:00,  5.95it/s]

Example 1:
Text: You are a classification model. Based on the given an abstract, you need to predict the most relevant category label from below
Human Necessities,
Operations; Transporting,
Chemistry; Metallurgy,
Textiles; Paper,
Fixed Constructions,
Mechanical Engineering; Lightning; Heating; Weapons; Blasting,
Physics,
Electricity,
General tagging of new or cross-sectional technology. 
One article has only one label.

 ### Input abstract: a capacitor is disposed within a semiconductor device assembly atop a plastic layer pad , beneath which passes a pair of leads connected to a semiconductor device . the capacitor is connected to the pair of leads , such as by soldering , spot welding or conductive epoxy through cutouts in the pad . in one embodiment , the cutouts extend into the pad from inner and outer edges thereof . in another embodiment , the cutouts are holes through the pad . a plurality , such as four , capacitors are conveniently disposed atop a corresponding plurality of pa




In [39]:
patent_modified = zero_shot_classification(
    dataset_name="MAdAiLab/patent_classification",
    prefix=prefix
)

Processed prompts: 100%|██████████| 25000/25000 [23:26<00:00, 17.77it/s]
Processed prompts: 100%|██████████| 5000/5000 [04:41<00:00, 17.75it/s]
Processed prompts: 100%|██████████| 5000/5000 [04:42<00:00, 17.69it/s]


In [40]:
patent_modified.save_to_disk("./output/patent_predicted")

Saving the dataset (0/1 shards):   0%|          | 0/25000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5000 [00:00<?, ? examples/s]

## Scotus dataset

In [4]:
scotus_dataset = load_dataset("MAdAiLab/lex_glue_scotus")
scotus_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1400
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1400
    })
})

In [7]:
prefix = """
You are an expert in legal issue area classification, with a deep understanding of the US Supreme Court's opinions and the subject matter of controversies.
Your task is to analyze the given court opinion and classify it into one of the 14 relevant issue areas.
When analyzing the opinion, consider the overall content, legal concepts, and subject matter within the text.
The 14 issue areas are: (0) Criminal Procedure, (1) Civil Rights, (2) First Amendment, (3) Due Process, (4) Privacy, (5) Attorneys, (6) Unions, (7) Economic Activity, (8) Judicial Power, (9) Federalism, (10) Interstate Relations, (11) Federal Taxation, and (12) Miscellaneous
Provide your analysis in a concise and definitive manner, outputting the number corresponding to the relevant issue area based on your assessment of the opinion's content.
"""

In [9]:
prompts = scotus_dataset['train'][:10]['text']
labels = scotus_dataset['train'][:10]['label']

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "Text: " + prompt + "\nResponse: " for prompt in prompts]

outputs = llm.generate(generating_prompts, sampling_params,)

# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    # print(f"Prefix: {prefix.strip()}")
    # print(f"Text: {prompt.split('Text: ')[-1].strip()}")
    print(f"Actual label: {labels[i-1]}")
    print(f"Predicted label: {generated_text.strip()}")
    print("-" * 50 + "\n")

Processed prompts:  20%|██        | 2/10 [00:00<00:01,  6.67it/s]



Processed prompts:  30%|███       | 3/10 [00:00<00:01,  6.17it/s]



Processed prompts: 100%|██████████| 10/10 [00:06<00:00,  1.46it/s]

Example 1:
Actual label: 7
Predicted label: 8
--------------------------------------------------

Example 2:
Actual label: 7
Predicted label: 3
--------------------------------------------------

Example 3:
Actual label: 0
Predicted label: 3
--------------------------------------------------

Example 4:
Actual label: 1
Predicted label: 
--------------------------------------------------

Example 5:
Actual label: 7
Predicted label: 
--------------------------------------------------

Example 6:
Actual label: 7
Predicted label: 7
--------------------------------------------------

Example 7:
Actual label: 7
Predicted label: 
--------------------------------------------------

Example 8:
Actual label: 1
Predicted label: 
--------------------------------------------------

Example 9:
Actual label: 6
Predicted label: 3
--------------------------------------------------

Example 10:
Actual label: 0
Predicted label: 3
--------------------------------------------------




