# LLM Inference  - Text Classification using Few Shot Prompting

In [105]:
!pip install -U scikit-learn

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scikit-learn
Successfully installed scikit-learn-1.4.2 threadpoolctl-3.5.0


## Setup Environment

In [1]:
import os
from datasets import load_dataset,DatasetDict
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

HF_TOKEN = os.getenv("HF_TOKEN")
WANDB_API_KEY = os.getenv("WANDB_API_KEY")

from vllm import LLM, SamplingParams
import random

## Instantiate a LLM 

In [2]:
llm = LLM(
        model="meta-llama/Meta-Llama-3-70B-Instruct",
        tensor_parallel_size=4,
        trust_remote_code=True,
        enforce_eager=True,
        gpu_memory_utilization=0.99,
        enable_prefix_caching=True
)

2024-05-03 17:20:18,706	INFO worker.py:1749 -- Started a local Ray instance.


INFO 05-03 17:20:24 llm_engine.py:98] Initializing an LLM engine (v0.4.1) with config: model='meta-llama/Meta-Llama-3-70B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3-70B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=4, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


INFO 05-03 17:20:32 utils.py:608] Found nccl from library /home/u.ap164907/.config/vllm/nccl/cu12/libnccl.so.2.18.1
[36m(RayWorkerWrapper pid=2306376)[0m INFO 05-03 17:20:32 utils.py:608] Found nccl from library /home/u.ap164907/.config/vllm/nccl/cu12/libnccl.so.2.18.1
INFO 05-03 17:20:34 selector.py:28] Using FlashAttention backend.
[36m(RayWorkerWrapper pid=2306376)[0m INFO 05-03 17:20:34 selector.py:28] Using FlashAttention backend.
INFO 05-03 17:20:36 pynccl_utils.py:43] vLLM is using nccl==2.18.1
[36m(RayWorkerWrapper pid=2306376)[0m INFO 05-03 17:20:36 pynccl_utils.py:43] vLLM is using nccl==2.18.1
[36m(RayWorkerWrapper pid=2306581)[0m INFO 05-03 17:20:32 utils.py:608] Found nccl from library /home/u.ap164907/.config/vllm/nccl/cu12/libnccl.so.2.18.1[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplicatio

## Common function for all dataset

In [3]:
def zero_shot_classification(dataset_name, prefix , split_name):
    # Load the dataset
    dataset = load_dataset(dataset_name)

    # Iterate over the dataset splits (train, test, validation)
    modified_dataset_dict = {}
    subset=[split_name]
    # for split in ["train", "test", "validation"]:
    for split in subset:
        # Get the texts and labels from the current split
        texts = dataset[split]["text"]
        labels = dataset[split]["label"]

        # Generate the prompts for each Text
        generating_prompts = [prefix + "Text: " + text + "\nResponse: " for text in texts]

        # Set the sampling parameters
        sampling_params = SamplingParams(temperature=0, max_tokens=1)

        # Generate the sentiment labels for each text
        outputs = llm.generate(generating_prompts, sampling_params)
        predicted_label = []
        for output in outputs:
            try:
                predicted_label.append(int(output.outputs[0].text))
            except ValueError:
                predicted_label.append(-1)

        # Add the predicted labels to the dataset
        modified_dataset = dataset[split].add_column("predicted_label", predicted_label)
        modified_dataset_dict[split] = modified_dataset

    # Create a DatasetDict with the modified datasets
    return DatasetDict(modified_dataset_dict)

## Twitter Dataset

In [3]:
twitter_dataset = load_dataset("MAdAiLab/twitter_disaster")
twitter_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8700
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1088
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1088
    })
})

In [72]:
twitter_dataset['train'][:5]

{'text': ['@sabcnewsroom sabotage!I rule out structural failure',
  'Two giant cranes holding a bridge collapse into nearby homes http://t.co/UmANaaHwMI',
  '@yeetrpan I asked if they were hiring and they said not you I was devastated.',
  'Watch This Airport Get Swallowed Up By A Sandstorm In Under A Minute http://t.co/7IJlZ6BcSP',
  'Survived my first #tubestrike thanks to @Citymapper'],
 'label': [0, 1, 0, 1, 0]}

In [51]:
random.sample()

[1131, 1471, 2133, 2450, 633]

### Prefix 1 - Elaborative prefix

In [99]:
prefix = """
'text': @sabcnewsroom sabotage!I rule out structural failure',
  'Two giant cranes holding a bridge collapse into nearby homes http://t.co/UmANaaHwMI',
  '@yeetrpan I asked if they were hiring and they said not you I was devastated.',
  'Watch This Airport Get Swallowed Up By A Sandstorm In Under A Minute http://t.co/7IJlZ6BcSP',
  'Survived my first #tubestrike thanks to @Citymapper'],
 'label': [0, 1, 0, 1, 0]
Based on the previous text what is the answer to the this  
"""

In [101]:
# Get the training set
train_set = twitter_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 5))

sampling_params = SamplingParams(temperature=0,max_tokens=1)

generating_prompts = [prefix + "\nText: " + example['text'] + "\nlabel: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 5/5 [00:00<00:00, 22.13it/s]

Prefix: 
'text': @sabcnewsroom sabotage!I rule out structural failure',
  'Two giant cranes holding a bridge collapse into nearby homes http://t.co/UmANaaHwMI',
  '@yeetrpan I asked if they were hiring and they said not you I was devastated.',
  'Watch This Airport Get Swallowed Up By A Sandstorm In Under A Minute http://t.co/7IJlZ6BcSP',
  'Survived my first #tubestrike thanks to @Citymapper'],
 'label': [0, 1, 0, 1, 0]
Based on the previous text what is the answer to the this  

Example 1:
Text: @DwarfOnJetpack I guess I can say you and me might have one thing in common my biological father made me this way to be his weapon
label: 0
Actual label: 0
--------------------------------------------------

Example 2:
Text: Oil and Gas Exploration Takes Seismic Shift in Gabon to Somalia - Bloomberg http://t.co/bEKrPjnYHs #??????? #Somalia
label: 1
Actual label: 1
--------------------------------------------------

Example 3:
Text: I think that none of us know the impact we have on the lives 




### Prefix 2 - Simple prefix

In [38]:
prefix = """
“You are a classification model. 
Based on the given tweet, you need to predict the most relevant category label from 'positive' or 'negative'.
One tweet has only one label. 
"""

In [46]:
# Get the training set
train_set = twitter_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 10))

sampling_params = SamplingParams(temperature=0)

generating_prompts = [prefix + "Text: " + example['text'] + "\nResponse: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 10/10 [00:01<00:00,  8.26it/s]

Prefix: 
“You are a classification model. 
Based on the given tweet, you need to predict the most relevant category label from 'positive' or 'negative'.
One tweet has only one label. 

Example 1:
Text: INVESTMENT NEWS Keurig Green Mountain Inc. Third-Quarter Earnings: Shares Sinking After-Hours - Stocks in the NewÛ_ http://t.co/GtdNW1SpVi
Response: negative”
--------------------------------------------------

Example 2:
Text: New music from @ApolloBrown featuring M.O.P.? 'Detonate' taken off his album 'Grandeur' coming soon - http://t.co/m1xYkEcRzr
Response: positive”
--------------------------------------------------

Example 3:
Text: Hollywood movie about trapped miners released in Chile
Response: positive”
--------------------------------------------------

Example 4:
Text: RT : Why Sweden Isn't Venezuela: There have been a few days of rioting in Venezuela with the riots directed at grÛ_ http://t.co/GJfd85vuf2
Response: negative”
--------------------------------------------------





### Convert dataset and add predicted label

In [102]:
twitter_modified = zero_shot_classification(
    dataset_name="MAdAiLab/twitter_disaster",
    prefix=prefix
)

Processed prompts: 100%|██████████| 8700/8700 [02:59<00:00, 48.50it/s]
Processed prompts: 100%|██████████| 1088/1088 [00:22<00:00, 49.14it/s]
Processed prompts: 100%|██████████| 1088/1088 [00:22<00:00, 48.95it/s]


In [103]:
twitter_modified

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 8700
    })
    test: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 1088
    })
    validation: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 1088
    })
})

In [106]:
from sklearn.metrics import accuracy_score

# Load the test set
test_set = twitter_modified['test']

# Get the true labels
y_true = test_set['label']

# Get the predicted labels
y_pred = test_set['predicted_label']

# Calculate the accuracy
accuracy = accuracy_score(y_true, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.6709558823529411


In [11]:
twitter_modified.save_to_disk("./output/twitter_predicted")

Saving the dataset (0/1 shards):   0%|          | 0/8700 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1088 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1088 [00:00<?, ? examples/s]

## Patent Classification Dataset

In [6]:
patent_dataset = load_dataset("MAdAiLab/patent_classification")
patent_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
})

In [109]:
patent_dataset['train'][:5]

{'text': ['an apparatus for simultaneously testing multiple integrated circuits includes a sensing circuit associated with each of the tested circuits . each sensing circuit includes a differential amplifier with its positive input connected to the input of the test circuit , and its inversion input connected to the test circuit output . the test circuit input and positive amplifier input are biased to a selected voltage , and the voltage drop across the test circuit is provided to the amplifier inversion input . whenever the test circuit is open , intermittently open or highly resistive , the voltage drop across the test circuit exceeds the threshold voltage of the differential amplifier , causing the amplifier to generate a high level logic output representing an open circuit condition . the outputs of the various sensing circuits together form a digital word representative of the condition of all of the test circuits . the outputs of the differential amplifiers also are provided to 

### Prefix 1 : Elaborative Prefix

In [7]:
prefix = """
'text': ['an apparatus for simultaneously testing multiple integrated circuits includes a sensing circuit associated with each of the tested circuits . each sensing circuit includes a differential amplifier with its positive input connected to the input of the test circuit , and its inversion input connected to the test circuit output . the test circuit input and positive amplifier input are biased to a selected voltage , and the voltage drop across the test circuit is provided to the amplifier inversion input . whenever the test circuit is open , intermittently open or highly resistive , the voltage drop across the test circuit exceeds the threshold voltage of the differential amplifier , causing the amplifier to generate a high level logic output representing an open circuit condition . the outputs of the various sensing circuits together form a digital word representative of the condition of all of the test circuits . the outputs of the differential amplifiers also are provided to independent triggering circuitry for enabling the storage of sensing circuit outputs upon an open condition indicated for at least one of the test circuits . the outputs of the sampling circuits are sampled in parallel at 100 nanosecond or longer selected intervals , so that extremely brief intermittent opens are detected .',
  'an electrosurgical instrument includes a housing , a shaft extending from the housing , and an end effector assembly attached at a distal end of the shaft . a handle assembly is coupled to the housing and includes a movable handle for manipulating the end effector assembly . an outer sleeve is disposed about the shaft and selectively translatable relative thereto . an energizable member is operably coupled to the outer sleeve . a deployment mechanism is provided including a lever rotatably coupled to the housing and positioned proximally of the movable handle and at least one link member coupled between the lever and the outer sleeve . the link member couple to the outer sleeve distally of the movable handle . rotation of the lever translates the outer sleeve distally to move the outer sleeve over the end effector assembly and simultaneously deploy the energizable member distally past the end effector assembly .',
  'a wireless transceiver device employing the code select code division multiple access method includes an encoder configured to execute error correction coding on data having a predetermined number of bits among data inputted from outside , and a code selector configured to select a code corresponding to the data subjected to correction coding . the wireless transceiver device performs wireless communication by use of the data inputted from the outside excluding the data having the predetermined number of bits and by use of the code .',
  'the invention provides for the use of protein kinase activators or boosters of nerve growth factor , brain - derived neurotrophic factor or other neurotrophic factors to treat stroke . specifically , the present invention provides methods of treating stroke comprising the steps of identifying a subject having suffered a stroke and administering to said subject an amount of a pharmaceutical composition comprising a protein kinase c activator or 4 - methylcatechol acetic acid and a pharmaceutically acceptable carrier effective to treat at least one symptom of stroke .',
  'the present invention relates to a device for mixing fluids . it is a hydraulic or pneumatic apparatus , depending on the fluid used for transportation . it is static and has the characteristics of both an extractor and a fluid mixer . extraction is effected by dragging the suction elements , by means of the circulation of a transporting fluid injected at low pressure . the injection inlets and suction inlets are interchangeable and lead to a single outlet . the injection tube formed by a helical spiral on the outside surrounded by the sheath increases the pressure in the transporting fluid and creates outward helical movement with centrifugal force in all the fluid that circulates on the outside .'],
 'label': [6, 0, 7, 0, 8]}
 
Your task is to analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology

Based on the previous text what is the answer to the this  
"""

In [8]:
# Get the training set
train_set = patent_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 5))

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "\nText: " + example['text'] + "\nlabel: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 5/5 [00:02<00:00,  1.90it/s]

Prefix: 
'text': ['an apparatus for simultaneously testing multiple integrated circuits includes a sensing circuit associated with each of the tested circuits . each sensing circuit includes a differential amplifier with its positive input connected to the input of the test circuit , and its inversion input connected to the test circuit output . the test circuit input and positive amplifier input are biased to a selected voltage , and the voltage drop across the test circuit is provided to the amplifier inversion input . whenever the test circuit is open , intermittently open or highly resistive , the voltage drop across the test circuit exceeds the threshold voltage of the differential amplifier , causing the amplifier to generate a high level logic output representing an open circuit condition . the outputs of the various sensing circuits together form a digital word representative of the condition of all of the test circuits . the outputs of the differential amplifiers also are prov




### Prefix 2 - Simple prefix

In [30]:
prefix = """
Analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology
Provide your classification in a concise and definitive manner, outputting the corresponding class label (0-8). 
Do not provide any additional commentary or explanation beyond the classification itself.
"""

In [31]:
# Get the training set
train_set = patent_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 5))

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "Text: " + example['text'] + "\nResponse: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 5/5 [00:00<00:00,  6.37it/s]

Prefix: 
Analyze the given patent abstract text and classify it into one of the 9 categories:
'0': Human Necessities
'1': Performing Operations; Transporting
'2': Chemistry; Metallurgy
'3': Textiles; Paper
'4': Fixed Constructions
'5': Mechanical Engineering; Lightning; Heating; Weapons; Blasting
'6': Physics
'7': Electricity
'8': General tagging of new or cross-sectional technology
Provide your classification in a concise and definitive manner, outputting the corresponding class label (0-8). 
Do not provide any additional commentary or explanation beyond the classification itself.

Example 1:
Text: a vehicle communication system is operable to calculate and compare vehicle statistics based on information received from a vehicle sensor . this statistic information can be used by the vehicle communication system to execute a plurality of statistic based games . additionally , relevant statistic information may be transmitted to a remote network for the purpose of playing games against o




### Prefix 2 - Very Simplistic prefix

In [32]:
prefix = """
You are a classification model. Based on the given an abstract, you need to predict the most relevant category label from below
Human Necessities,
Operations; Transporting,
Chemistry; Metallurgy,
Textiles; Paper,
Fixed Constructions,
Mechanical Engineering; Lightning; Heating; Weapons; Blasting,
Physics,
Electricity,
General tagging of new or cross-sectional technology. 
One article has only one label.
"""

In [36]:
# Get the training set
train_set = patent_dataset['train']

# Randomly select 5 samples from the training set
random_subset = train_set.select(random.sample(range(len(train_set)), 10))

sampling_params = SamplingParams(temperature=0)

generating_prompts = [prefix + "\n ### Input abstract: " + example['text'] + "\n ### Output: " for example in random_subset]

outputs = llm.generate(generating_prompts, sampling_params)
# print(f"Prefix: {prefix}")
# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    print(f"Text: {prompt.split('Text: ')[-1].strip()} {generated_text.strip()}")
    # print(f"Actual label: {random_subset[i-1]['label']}")
    print("-" * 50 + "\n")

Processed prompts: 100%|██████████| 10/10 [00:01<00:00,  5.95it/s]

Example 1:
Text: You are a classification model. Based on the given an abstract, you need to predict the most relevant category label from below
Human Necessities,
Operations; Transporting,
Chemistry; Metallurgy,
Textiles; Paper,
Fixed Constructions,
Mechanical Engineering; Lightning; Heating; Weapons; Blasting,
Physics,
Electricity,
General tagging of new or cross-sectional technology. 
One article has only one label.

 ### Input abstract: a capacitor is disposed within a semiconductor device assembly atop a plastic layer pad , beneath which passes a pair of leads connected to a semiconductor device . the capacitor is connected to the pair of leads , such as by soldering , spot welding or conductive epoxy through cutouts in the pad . in one embodiment , the cutouts extend into the pad from inner and outer edges thereof . in another embodiment , the cutouts are holes through the pad . a plurality , such as four , capacitors are conveniently disposed atop a corresponding plurality of pa




### Convert dataset and add predicted label

In [9]:
patent_modified = zero_shot_classification(
    dataset_name="MAdAiLab/patent_classification",
    prefix=prefix,
    split_name="test"
)

Processed prompts: 100%|██████████| 5000/5000 [07:02<00:00, 11.84it/s]


In [10]:
patent_modified

DatasetDict({
    test: Dataset({
        features: ['text', 'label', 'predicted_label'],
        num_rows: 5000
    })
})

In [11]:
from sklearn.metrics import accuracy_score

# Load the test set
test_set = patent_modified['test']

# Get the true labels
y_true = test_set['label']

# Get the predicted labels
y_pred = test_set['predicted_label']

# Calculate the accuracy
accuracy = accuracy_score(y_true, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.349


In [40]:
patent_modified.save_to_disk("./output/patent_predicted")

Saving the dataset (0/1 shards):   0%|          | 0/25000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5000 [00:00<?, ? examples/s]

## Scotus dataset

In [4]:
scotus_dataset = load_dataset("MAdAiLab/lex_glue_scotus")
scotus_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 5000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1400
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1400
    })
})

In [7]:
prefix = """
You are an expert in legal issue area classification, with a deep understanding of the US Supreme Court's opinions and the subject matter of controversies.
Your task is to analyze the given court opinion and classify it into one of the 14 relevant issue areas.
When analyzing the opinion, consider the overall content, legal concepts, and subject matter within the text.
The 14 issue areas are: (0) Criminal Procedure, (1) Civil Rights, (2) First Amendment, (3) Due Process, (4) Privacy, (5) Attorneys, (6) Unions, (7) Economic Activity, (8) Judicial Power, (9) Federalism, (10) Interstate Relations, (11) Federal Taxation, and (12) Miscellaneous
Provide your analysis in a concise and definitive manner, outputting the number corresponding to the relevant issue area based on your assessment of the opinion's content.
"""

In [9]:
prompts = scotus_dataset['train'][:10]['text']
labels = scotus_dataset['train'][:10]['label']

sampling_params = SamplingParams(temperature=0, max_tokens=1)

generating_prompts = [prefix + "Text: " + prompt + "\nResponse: " for prompt in prompts]

outputs = llm.generate(generating_prompts, sampling_params,)

# Print the outputs
for i, output in enumerate(outputs, start=1):
    prompt = output.prompt
    generated_text = output.outputs[0].text

    print(f"Example {i}:")
    # print(f"Prefix: {prefix.strip()}")
    # print(f"Text: {prompt.split('Text: ')[-1].strip()}")
    print(f"Actual label: {labels[i-1]}")
    print(f"Predicted label: {generated_text.strip()}")
    print("-" * 50 + "\n")

Processed prompts:  20%|██        | 2/10 [00:00<00:01,  6.67it/s]



Processed prompts:  30%|███       | 3/10 [00:00<00:01,  6.17it/s]



Processed prompts: 100%|██████████| 10/10 [00:06<00:00,  1.46it/s]

Example 1:
Actual label: 7
Predicted label: 8
--------------------------------------------------

Example 2:
Actual label: 7
Predicted label: 3
--------------------------------------------------

Example 3:
Actual label: 0
Predicted label: 3
--------------------------------------------------

Example 4:
Actual label: 1
Predicted label: 
--------------------------------------------------

Example 5:
Actual label: 7
Predicted label: 
--------------------------------------------------

Example 6:
Actual label: 7
Predicted label: 7
--------------------------------------------------

Example 7:
Actual label: 7
Predicted label: 
--------------------------------------------------

Example 8:
Actual label: 1
Predicted label: 
--------------------------------------------------

Example 9:
Actual label: 6
Predicted label: 3
--------------------------------------------------

Example 10:
Actual label: 0
Predicted label: 3
--------------------------------------------------




