In [None]:
import pdfplumber
import asyncio
import spacy
import re
from spacy.matcher import Matcher
from transformers import pipeline , AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI, File, UploadFile,HTTPException
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import nest_asyncio
from io import BytesIO
from pydantic import BaseModel
import logging
import traceback
import openai
import os
from openai import OpenAI
import os
from huggingface_hub import login
from dotenv import load_dotenv
import openai

load_dotenv()

# Retrieve Hugging Face token from .env
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
GPT_API=os.getenv("GPT_KEY")

openai = OpenAI(api_key=GPT_API)


if not HUGGINGFACE_TOKEN:
    raise ValueError("Hugging Face token not found! Please set HUGGINGFACE_TOKEN in .env")



# Load API key from .env
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not GROQ_API_KEY:
    raise ValueError("Groq API key not found! Please set GROQ_API_KEY in .env")

# Configure OpenAI client to use Groq
client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")


# Initialize FastAPI app
app = FastAPI()

# Enable CORS for frontend requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Update for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Load Spacy NLP Model
nlp = spacy.load("en_core_web_sm")

# Load Transformer Model for Summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def extract_text_from_pdf(pdf_bytes: bytes) -> str:
    """Extracts raw text from a PDF file using pdfplumber."""
    text = ""
    with pdfplumber.open(BytesIO(pdf_bytes)) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text.strip()

def extract_sentences_with_numeric_data(text):
    """Extracts key financial sentences with numeric data."""
    matcher = Matcher(nlp.vocab)

    matcher.add("MONEY_PATTERNS", [
        [{"LIKE_NUM": True}, {"TEXT": {"IN": ["$", "₹", "€", "£", "¥"]}}],
        [{"TEXT": {"IN": ["$", "₹", "€", "£", "¥"]}}, {"LIKE_NUM": True}],
        [{"TEXT": {"IN": ["$", "₹", "€", "£", "¥"]}}, {"IS_DIGIT": True}],
        [{"LIKE_NUM": True}, {"LOWER": {"IN": ["dollars", "rupees", "euros", "pounds", "yen"]}}],
        [{"LIKE_NUM": True}, {"LOWER": "rs"}, {"TEXT": {"REGEX": r"\.?"}}]
    ])

    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    page_number_pattern = re.compile(r'(\bpage\s*\d+\b)|(\bpg\.\s*\d+\b)|(\b\d+\s*/\s*\d+\b)|(\bpage\s*\d+\s*of\s*\d+\b)', flags=re.IGNORECASE)
    slash_word_pattern = re.compile(r'\b\w+/\w+\b')
    triple_slash_pattern = re.compile(r'\S*///\S*')

    doc = nlp(text)
    sentences_with_numeric_data = set()

    for ent in doc.ents:
        if ent.label_ in ["DATE", "TIME", "MONEY", "PERCENT"]:
            sentences_with_numeric_data.add(ent.sent.start)

    matches = matcher(doc)
    for match_id, start, end in matches:
        sent_index = doc[start].sent.start
        sentences_with_numeric_data.add(sent_index)

    number_pattern = re.compile(r'\b\d+\b|\b\d+[.,]\d+\b')
    for sent in doc.sents:
        if number_pattern.search(sent.text):
            sentences_with_numeric_data.add(sent.start)

    result = []
    for sent in doc.sents:
        if sent.start in sentences_with_numeric_data:
            clean_sentence = url_pattern.sub('', sent.text)
            clean_sentence = page_number_pattern.sub('', clean_sentence)
            clean_sentence = slash_word_pattern.sub('', clean_sentence)
            clean_sentence = triple_slash_pattern.sub('', clean_sentence)
            clean_sentence = clean_sentence.strip()
            if clean_sentence:
                result.append(clean_sentence)

    return " ".join(result)

def chunk_text(text, tokenizer, max_tokens=512):
    """Split text into smaller chunks for summarization."""
    inputs = tokenizer(text, return_tensors="pt", truncation=False)
    input_ids = inputs["input_ids"][0]

    chunks = []
    for i in range(0, len(input_ids), max_tokens):
        chunk_ids = input_ids[i:i + max_tokens]
        chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
        chunks.append(chunk_text)

    return chunks

def summarize_text(text, max_tokens=512):
    """Summarizes extracted financial data."""
    tokenizer = summarizer.tokenizer
    chunks = chunk_text(text, tokenizer, max_tokens=max_tokens)
    summaries = []

    for chunk in chunks:
        summary = summarizer(chunk, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
        summaries.append(summary)

    return " ".join(summaries)

#gpt story
def chatgpt_generate_story(summary):
    """Generates a financial story with twists and turns using ChatGPT API."""
    if not openai.api_key:
        print("Error: OpenAI API key not found.")
        return "API Key Missing"

    prompt = (
        "Here is the financial summary you need to convert into a story. Please ensure clarity, engagement, and an easy-to-follow structure while maintaining factual accuracy. Make sure to conclude with a well-reasoned outlook on the company’s future trajectory. "
        f"Summary: {summary}\n\nStory:"
    )

    try:
      system_message="""You are a skilled financial storyteller with the ability to break down complex financial data into engaging, easy-to-understand narratives. Your goal is to transform a given financial summary into a compelling story that explains key financial events, trends, and figures in a way that anyone—regardless of their financial background—can grasp.

Your story should be:

Engaging: Use an approachable tone, like a journalist explaining financial news to a general audience.

Simple & Clear: Avoid jargon when possible; when using technical terms, explain them in simple words.

Well-Structured: Start with an introduction that sets the stage, followed by a breakdown of key figures, trends, and their impact.

Relatable: Use analogies and real-world comparisons to make numbers and trends easier to grasp.

Forward-Looking: Provide insights into where the company is headed based on financial trends, market conditions, and strategic decisions. Predict potential risks and opportunities in a balanced manner.
"""
      user_prompt=prompt
      prompts = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
      ]
      completion = openai.chat.completions.create(model='gpt-4o-mini', messages=prompts)
      return completion.choices[0].message.content
    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return "Story generation failed."

#red_flag_prompt
def chatgpt_generate_green(summary):
    """Generates a financial story with twists and turns using ChatGPT API."""
    if not openai.api_key:
        print("Error: OpenAI API key not found.")
        return "API Key Missing"

    prompt = (
        "Here is the financial summary: "
        f"Summary: {summary}\n\nRed flags: "
    )

    try:
      system_message="""You are a financial analyst tasked with identifying green flags—positive signs that indicate strong financial health, growth potential, or competitive advantages for a company. Your goal is to analyze the given financial summary while also leveraging pre-existing knowledge about the company to highlight strengths and opportunities. Stick to at max 5 points and give it in bullet points
"""
      user_prompt=prompt
      prompts = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
      ]
      completion = openai.chat.completions.create(model='gpt-4o-mini', messages=prompts)
      return completion.choices[0].message.content
    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return "Story generation failed."

def chatgpt_generate_red(summary):
    """Generates a financial story with twists and turns using ChatGPT API."""
    if not openai.api_key:
        print("Error: OpenAI API key not found.")
        return "API Key Missing"

    prompt = (
        "Here is the financial summary: "
        f"Summary: {summary}\n\nRed flags: "
    )

    try:
      system_message="""You are a financial risk analyst tasked with identifying potential red flags in a company’s financial summary. Your goal is to analyze the given financial data, leveraging both the provided summary and any pre-existing knowledge about the company to highlight concerns, risks, or warning signs. Stick to at max 5 points and give it in bullet points
"""
      user_prompt=prompt
      prompts = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
      ]
      completion = openai.chat.completions.create(model='gpt-4o-mini', messages=prompts)
      return completion.choices[0].message.content
    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return "Story generation failed."

@app.post("/upload/")
async def upload_pdf(file: UploadFile = File(...)):
    """Handles PDF upload, extracts text, processes it, and returns a summarized result."""
    pdf_bytes = await file.read()
    raw_text = extract_text_from_pdf(pdf_bytes)
    extracted_text = extract_sentences_with_numeric_data(raw_text)
    summary = summarize_text(extracted_text)
    financial_story=chatgpt_generate_story(summary)
    green_flags=chatgpt_generate_green(summary)
    
    red_flags=chatgpt_generate_red(summary)
    conclusion=None
    visualisations=None
    
    # Return comprehensive analysis
    return {
            "summary": summary,
            "redFlags": red_flags,
            "greenFlags": green_flags,
            "story": financial_story,
            "conclusion": conclusion,
            "visualisation":visualisations
        
        }


class ChatMessage(BaseModel):
    message: str

class ChatResponse(BaseModel):
    response: str

# Specific finance-oriented model (small and lightweight)
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"  # Replace with a smaller LLaMA model if needed

class FinanceChatbot:
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    async def generate_response(self, query: str) -> str:
        if not GROQ_API_KEY:
            return "Sorry, the chatbot is currently unavailable."

        # Create a structured financial prompt
        financial_prompt = f"Please provide a small chat size response for '{query}' and use normal text not markdown"

        try:
            response = client.chat.completions.create(
                model="gemma2-9b-it",  
                messages=[{"role": "system", "content": "You are a helpful chatbot who get more questions on the financial side but balanced. IF questions asked financial based answer it in terms of company."},
                          {"role": "user", "content": financial_prompt}],
                max_tokens=200,
                temperature=0.7,
            )

            return response.choices[0].message.content.strip()
        
        except Exception as e:
            print(f"Error generating response: {e}")
            return "I encountered an error processing your query."

    

    

finance_chatbot = FinanceChatbot()

@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(chat_message: ChatMessage):
    try:
        # Generate response using the finance chatbot
        response_text = await finance_chatbot.generate_response(chat_message.message)
        return {"response": response_text}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run FastAPI inside Jupyter Notebook
nest_asyncio.apply()
uvicorn.run(app, host="0.0.0.0", port=8000)


[2KThe web browser should have opened for you to authenticate and get an API token.
If it didn't, please copy this URL into your web browser manually:

[2K]8;id=526265;https://modal.com/token-flow/tf-h8NHviRkJxLsLmg9hMfJxo\[4;94mhttps://modal.com/token-flow/tf-h8NHviRkJxLsLmg9hMfJxo[0m]8;;\

[2K[32m⠙[0m Waiting for authentication in the web browser
[2K[32m⠦[0m Waiting for token flow to complete...
[1A[2K[32mWeb authentication finished successfully![0m
[32mToken is connected to the [0m[35mvishy6400[0m[32m workspace.[0m
Verifying token against [4;34mhttps://api.modal.com[0m
[32mToken verified successfully![0m
[?25l[32m⠋[0m Storing token
[1A[2K[32mToken written to [0m[35m/Users/vishwajithp/[0m[35m.modal.toml[0m[32m in profile [0m[35mvishy6400[0m[32m.[0m


Despite inflationary pressures, the company has maintained a strong profit margin. The financial report for the last quarter highlights a steady increase in revenue.
