In [None]:
#!pip install OpenAI
#!pip install langchain
#!pip install langchain_community
#!pip install Cohere
#!pip install langchain-openai langchain-cohere python-dotenv

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
from google.colab import userdata

In [None]:
openai_key = userdata.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = openai_key


In [None]:

import os, json, re
import pandas as pd
import matplotlib.pyplot as plt


In [None]:

from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage



In [None]:

CSV_PATH = "https://github.com/giridhar276/genai/raw/refs/heads/main/datasets/Bank_Customer_conversations.csv"
TEXT_COL = "customer_text"
MODEL = "gpt-4o-mini"
TEMPERATURE = 0
TIMEOUT = 60

OUTPUT_PATH = CSV_PATH.replace(".csv", "_with_sentiment_simple.csv")


assert os.getenv("OPENAI_API_KEY"), "Please set OPENAI_API_KEY in your environment."

In [None]:

SYSTEM = """You are a strict sentiment classifier for short customer utterances from bank call transcripts.
Return ONLY a compact JSON object on a SINGLE LINE with keys:
- "label": one of "positive", "neutral", "negative"
- "confidence": a number in [0,1]
- "reason": a brief rationale (<= 15 words)
Judge tone + wording; ignore bank-specific facts. Prefer "neutral" if mixed.
No extra text before/after JSON.
"""

USER_TMPL = (
    "Classify the sentiment of the CUSTOMER text below.\n\n"
    "Rules:\n"
    "- Output strictly ONE LINE of JSON only.\n"
    "- Labels: \"positive\" | \"neutral\" | \"negative\".\n"
    "- Keep \"reason\" short (<= 15 words).\n"
    "- Consider tone: polite, frustrated, aggressive, harsh.\n\n"
    "CUSTOMER:\n"
    '"""{text}"""'
)


In [None]:

def strict_json_parse(s):
    # Simple: assume perfect JSON
    return json.loads(s)




In [None]:
def clean_text(x):
    # Simple: normalize whitespace only
    return re.sub(r"\s+", " ", str(x).replace("\r", " ").replace("\n", " ").strip())

In [None]:

df = pd.read_csv(CSV_PATH)
df[TEXT_COL] = df[TEXT_COL].astype(str).map(clean_text)
df = df[df[TEXT_COL].str.len() > 0].copy()
df.reset_index(drop=True, inplace=True)

print("Rows:", len(df))
df.head(3)


In [None]:

llm = ChatOpenAI(model=MODEL, temperature=TEMPERATURE, timeout=TIMEOUT)

def classify_one(text: str) -> dict:
    messages = [
        SystemMessage(content=SYSTEM),
        HumanMessage(content=USER_TMPL.format(text=text))
    ]
    resp = llm.invoke(messages)
    return strict_json_parse(resp.content)


In [None]:

labels, confs, reasons = [], [], []

for txt in df[TEXT_COL].tolist():
    obj = classify_one(txt)
    labels.append(obj["label"])
    confs.append(obj["confidence"])
    reasons.append(obj["reason"])


In [None]:

df["sentiment_label"] = labels
df["sentiment_confidence"] = confs
df["sentiment_reason"] = reasons




In [None]:
df.to_csv("sentimentnew.csv" ,index=False)
print(f"Saved: {OUTPUT_PATH}")


In [None]:

counts = df["sentiment_label"].value_counts().sort_index()
print("\nLabel counts:\n", counts)


In [None]:

plt.figure()
counts.plot(kind="bar")
plt.title("Sentiment label distribution")
plt.xlabel("label")
plt.ylabel("count")
plt.show()

df[["sentiment_label", "sentiment_confidence", "sentiment_reason", TEXT_COL]].head(10)