**Step 1 – Install Libraries**

In [None]:
!pip install transformers torch PyPDF2




**Step 2 – Import Dependencies**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch, PyPDF2
from IPython.display import HTML


**Step 3 – Convert PDF → Text**

In [None]:
def pdf_to_text(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

pdf_file = "/content/AI AI_FinanceInsight (1).pdf"  # ← your uploaded file
text_content = pdf_to_text(pdf_file)

# Save to .txt
with open("sample_financial_report.txt", "w", encoding="utf-8") as f:
    f.write(text_content)
print("✅ PDF converted to text successfully!")


✅ PDF converted to text successfully!


**Step 4 – Load FinBERT Model**

In [None]:
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")


***Step 5 – Run Sentiment Analysis***

In [None]:
sentences = [s.strip() for s in text_content.split('.') if s.strip()]
labels = ['neutral', 'positive', 'negative']

results = []
for sentence in sentences:
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    label_id = torch.argmax(probs, dim=1).item()
    results.append((sentence, labels[label_id], float(probs[0][label_id])))


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


**Step 6 – Create Color-Coded HTML**

In [None]:
html_output = "<html><body><h2>FinBERT Sentiment Analysis</h2>"
for sentence, label, score in results:
    color = "green" if label == "positive" else "red" if label == "negative" else "gray"
    html_output += f"<p style='color:{color}'>{sentence} → {label.upper()} ({score:.2f})</p>"
html_output += "</body></html>"

with open("finbert_results.html", "w", encoding="utf-8") as f:
    f.write(html_output)

print("✅ HTML file created: finbert_results.html")
HTML(html_output)


✅ HTML file created: finbert_results.html


**Step 7 –  Download Output Files**

In [None]:
from google.colab import files
files.download("sample_financial_report.txt")
files.download("finbert_results.html")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>