## Finetuned Bloom 1.7B

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

ds = load_dataset("MLap/SentiHin-2500", split="train")
label_map = {"negative": 0, "neutral": 1, "positive": 2}
true = [label_map[x["sentiment"]] for x in ds]

model_id = "MLap/bloom1.7-lora-sentiment-analysis-classification"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0, truncation=True)
pred = [int(p["label"].split("_")[-1]) for p in pipe(ds["sentence"], batch_size=8)]

print("Accuracy:", accuracy_score(true, pred))
print("Precision:", precision_score(true, pred, average="macro"))
print("Recall:", recall_score(true, pred, average="macro"))
print("F1 Score:", f1_score(true, pred, average="macro"))


2025-07-22 02:51:21.452372: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753152681.641444      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753152681.697859      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md: 0.00B [00:00, ?B/s]

train.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/2500 [00:00<?, ? examples/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/21.8M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/769 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-1b7 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.30M [00:00<?, ?B/s]

Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Accuracy: 0.3392
Precision: 0.11306666666666666
Recall: 0.3333333333333333
F1 Score: 0.16885702907208286


  _warn_prf(average, modifier, msg_start, len(result))


## Base Bloom 1.7B


In [1]:
from datasets import load_dataset
from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

ds = load_dataset("MLap/SentiHin-2500", split="train")
true = [x["sentiment"] for x in ds]

pipe = pipeline("zero-shot-classification", model="bigscience/bloom-1b7", device=0)
labels = ["positive", "negative", "neutral"]
pred = [pipe(x["sentence"], candidate_labels=labels)["labels"][0] for x in ds]

print("Accuracy:", accuracy_score(true, pred))
print("Precision:", precision_score(true, pred, average="macro", labels=labels))
print("Recall:", recall_score(true, pred, average="macro", labels=labels))
print("F1 Score:", f1_score(true, pred, average="macro", labels=labels))


2025-07-22 03:33:48.726719: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753155228.921918      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753155228.976430      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md: 0.00B [00:00, ?B/s]

train.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/2500 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-1b7 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

Device set to use cuda:0
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Accuracy: 0.4628
Precision: 0.5731183261972324
Recall: 0.459025437973755
F1 Score: 0.41305472349347755
