# **Enhanced-India-Centric-Stock-Sentiment-Analysis-JSON-Generator**



In [None]:
import torch
import pandas as pd
import json
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Paths
model_dir = "content/final_model"
tokenizer_dir = "content/final_tokenizer"
input_csv = "/content/headlines.csv"
output_json = "./output.json"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
model.to(device)
model.eval()

# Label mapping
label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}

# Load CSV
df = pd.read_csv(input_csv)

# Ensure text column
if "Headline" not in df.columns:
    raise ValueError("CSV must contain 'Headline' column")
df["Headline"] = df["Headline"].astype(str)

# Tokenize
encodings = tokenizer(list(df["Headline"]), truncation=True, padding=True, max_length=128, return_tensors="pt")
input_ids = encodings["input_ids"].to(device)
attention_mask = encodings["attention_mask"].to(device)

# Predict
with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    probs = torch.softmax(logits, dim=-1)
    scores, preds = torch.max(probs, dim=1)

# Prepare JSON
results = []
for headline, pred, score in zip(df["Headline"], preds.cpu().numpy(), scores.cpu().numpy()):
    results.append({
        "headline": headline,
        "sentiment": f"LABEL_{pred}",
        "score": float(score)
    })

# Save JSON
with open(output_json, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=4, ensure_ascii=False)

print(f"Predictions saved to {output_json}")

Predictions saved to ./output.json
