# Sentiment Analysis — Transformers Pipeline (DistilBERT SST-2)


In [None]:
!pip install OpenAI
!pip install langchain
!pip install langchain_community
!pip install Cohere
!pip install langchain-openai langchain-cohere python-dotenv

In [None]:
#!pip install transformers

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
from google.colab import userdata

In [None]:
# If needed, install:
# %pip install -q pandas transformers

In [None]:
import os
import pandas as pd
from transformers import pipeline



In [None]:
CSV_PATH = "https://raw.githubusercontent.com/giridhar276/genai/refs/heads/main/datasets/Bank_Customer_conversations.csv"

In [None]:
# Create the pipeline (optionally: device=0 if you have a GPU)
sentiment_model = pipeline(
    task="sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
    # , device=0  # uncomment if you want to force GPU (CUDA)
)

In [None]:
# Helper: normalize labels to lowercase 'positive'/'negative' for consistency
def normalize_label(label):
    label = (label or '').strip().lower()
    if label.startswith('pos'):
        return 'positive'
    return 'negative'


In [None]:
TEXT_COL = "customer_text"

In [None]:
df = pd.read_csv(CSV_PATH)
df = df[[TEXT_COL]].copy()
df[TEXT_COL] = df[TEXT_COL].astype(str).str.strip()
print('Rows:', len(df))
df.head(3)

In [None]:
def classify_one(text: str):
    res = sentiment_model(text)[0]
    return normalize_label(res['label']), float(res['score'])


In [None]:
labels, scores = [], []
for txt in df[TEXT_COL].tolist():
    lab, sc = classify_one(txt)
    labels.append(lab)
    scores.append(sc)
df['sentiment'] = labels
df['score'] = scores


In [None]:
df.to_csv("using_transformers.csv", index=False)
print(f'Saved:')
df[[TEXT_COL, 'sentiment', 'score']].head(10)