In [38]:
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, Float
from sqlalchemy.orm import declarative_base, sessionmaker
from datetime import datetime
import os
from dotenv import load_dotenv
load_dotenv()

PG_CONN_STR = os.getenv("PG_CONN_STR")

Base = declarative_base()
engine = create_engine(PG_CONN_STR)
SessionLocal = sessionmaker(bind=engine)

class MarsStep(Base):
    __tablename__ = "mars_steps"

    id = Column(Integer, primary_key=True, index=True)
    input_question = Column(Text)
    teacher_question = Column(Text)
    critique = Column(Text)
    final_question = Column(Text)
    final_answer = Column(Text)
    teacher_latency = Column(Float)
    critic_latency = Column(Float)
    student_latency = Column(Float)
    teacher_agent = Column(String)
    critic_agent = Column(String)
    student_agent = Column(String)
    timestamp = Column(DateTime, default=datetime.utcnow)

def init_db():
    Base.metadata.create_all(bind=engine)

In [39]:

def log_mars_step(input_q, teacher_q, critique, final_q, final_a,
                  t_latency=None, c_latency=None, s_latency=None,
                  t_agent="teacher", c_agent="critic", s_agent="student"):
    session = SessionLocal()
    step = MarsStep(
        input_question=input_q,
        teacher_question=teacher_q,
        critique=critique,
        final_question=final_q,
        final_answer=final_a,
        teacher_latency=t_latency,
        critic_latency=c_latency,
        student_latency=s_latency,
        teacher_agent=t_agent,
        critic_agent=c_agent,
        student_agent=s_agent
    )
    session.add(step)
    session.commit()
    session.close()

In [40]:
import dspy
from dspy import Signature, InputField, OutputField, Module


lm = dspy.LM('ollama_chat/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', api_base='http://localhost:11434', api_key='')
dspy.configure(lm=lm)

# Step 1: Define DSPy Signature
class AnalyzeMargins(Signature):
    context = InputField(desc="Relevant financial data")
    question = InputField(desc="User's trading question")
    signal = OutputField(desc="Bullish, Bearish, or Neutral")
    rationale = OutputField(desc="Explanation for the signal")
    

In [41]:
from dspy import Signature, InputField, OutputField
from dspy import Module, Predict

class FinancialTrendAnalysis(Signature):
    statements = InputField(desc="Multiple income statements across quarters")
    question = InputField(desc="Question about company performance")
    signal = OutputField(desc="bullish, bearish, or neutral")
    rationale = OutputField(desc="Explanation based on financial trends")


class IncomeStatementAnalyzer(Module):
    def __init__(self):
        super().__init__()
        self.analyze = Predict(FinancialTrendAnalysis)
        

    def forward(self, statements, question):
        return self.analyze(statements=statements, question=question)


In [42]:
import os
from dotenv import load_dotenv
import pandas as pd
from sqlalchemy import create_engine

from edgar import Company, set_identity
from edgar.xbrl2 import *

class EDGARFetcher:
    def __init__(self, ticker: str, form: str = "10-Q", n: int = 3):
        load_dotenv()

        # Load PG and EDGAR credentials
        self.pg_conn_str = os.getenv("PG_CONN_STR")
        self.identity = os.getenv("IDENTITY")
        self.engine = create_engine(self.pg_conn_str)
        self.ticker = ticker
        self.form = form
        self.n = n

        # Set identity for SEC API
        set_identity(self.identity)

    def fetch_markdown_statements(self):
        filings = Company(self.ticker).latest(form=self.form, n=self.n)

        statements = []
        for filing in filings:
            xbrl = XBRL.from_filing(filing)
            income_statement = xbrl.statements.income_statement()
            df = income_statement.to_dataframe()
            statements.append(self.rich_report_to_text(df))
        return statements

    def rich_report_to_text(self, df: pd.DataFrame) -> str:
        """
        Convert a rich EDGAR report DataFrame to readable plain text for LLMs.
        """
        lines = []
        for _, row in df.iterrows():
            label = row.get("original_label") or row.get("label") or row.get("concept")
            values = [
                f"{col}: {row[col]}" for col in df.columns
                if isinstance(col, str) and col.startswith("20") and pd.notna(row[col])
            ]
            if values:
                lines.append(f"{label}: " + " | ".join(values))
        return "\n".join(lines)


    def save_statements_to_db(self, statement_list, table="filings_markdown"):
        for statement in statement_list:
            df = statement.to_dataframe()
            df = df.T
            df = df.reset_index()
            print(df.head())
            df.to_sql(table, con=self.engine, index=False, if_exists="append")
            print(f"Inserted {len(df)} rows into {table}")

    def run(self):
        statements = self.fetch_markdown_statements()
        # self.save_statements_to_db(statements, table="income_statement")
        markdowns = [statement for statement in statements]
        return markdowns
ticker = "TSLA"
n = 3
fetcher = EDGARFetcher(ticker=ticker, n=n)
statements = fetcher.run()


In [43]:
# Create a utility function to estimate token count from a list of markdown statements

def estimate_token_count(markdown_list: list[str], chars_per_token: int = 4) -> int:
    """
    Estimate the number of tokens used by a list of markdown-formatted statements.

    Args:
        markdown_list (list[str]): A list of markdown text blocks.
        chars_per_token (int): Average number of characters per token. Default is 4.

    Returns:
        int: Estimated total token count.
    """
    combined_text = "\n\n".join(markdown_list)
    total_chars = len(combined_text)
    estimated_tokens = total_chars // chars_per_token
    return estimated_tokens

estimated = estimate_token_count(statements)
estimated


2116

In [44]:
def build_analysis_prompt(ticker: str, markdown_list: list[str]) -> str:
    header = f"You are a financial analysis model. Below are the last {len(markdown_list)} income statements from {ticker}.\n\n"
    instructions = (
        "Analyze the trend in revenue and operating income.\n"
        "Decide if profitability is improving or declining.\n"
        "Then provide a trading signal.\n\n"
        "Respond with:\n"
        "Signal: <Bullish/Bearish/Neutral>\n"
        "Rationale: <short explanation>\n\n"
    )
    body = "\n\n".join(markdown_list)
    return header + instructions + body

prompt = build_analysis_prompt(ticker, statements)
print(prompt[:300])


You are a financial analysis model. Below are the last 3 income statements from TSLA.

Analyze the trend in revenue and operating income.
Decide if profitability is improving or declining.
Then provide a trading signal.

Respond with:
Signal: <Bullish/Bearish/Neutral>
Rationale: <short explanation>



In [45]:
import litellm
litellm._turn_on_debug()
import logging
logging.basicConfig(level=logging.DEBUG, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 
                    handlers=[logging.FileHandler('vector_db.log', 'w', 'utf-8')])



In [None]:
from dspy import Signature, InputField, OutputField, Module

from dspy import Module, Predict, ChainOfThought
from dspy import Signature, InputField, OutputField

class TeacherQuestion(Signature):
    prompt = InputField()
    question = OutputField(desc="A Socratic question to improve the prompt")

class TeacherQuestioner(Module):
    def __init__(self, use_chain_of_thought: bool = True):
        super().__init__()
        self.use_chain_of_thought = use_chain_of_thought

        self.generate = (
            ChainOfThought(TeacherQuestion)
            if self.use_chain_of_thought
            else Predict(TeacherQuestion)
        )

    def forward(self, prompt):
        return self.generate(prompt=prompt)


class CritiqueQuestion(Signature):
    question = InputField()
    critique = OutputField(desc="Is the question Socratic? Why or why not?")

class CriticJudge(Module):
    def __init__(self):
        super().__init__()
        self.evaluate = dspy.Predict(CritiqueQuestion)

    def forward(self, question):
        return self.evaluate(question=question)

class MarginAnalyzer(Module):
    def __init__(self):
        super().__init__()
        self.analyze = dspy.ChainOfThought(AnalyzeMargins)

    def forward(self, context, question, teacher_question=None):
        if teacher_question:
            question = f"{question} Consider also: {teacher_question}"
        return self.analyze(context=context, question=question)


In [50]:
import dspy

class MarsAnalysisProgram(dspy.Program):
    def __init__(self, teacher: dspy.Module, critic: dspy.Module, student: dspy.Module):
        super().__init__()
        self.teacher = teacher
        self.critic = critic
        self.student = student

    def forward(self, context: str, base_question: str):
        # Step 1: Generate Socratic question
        teacher_out = self.teacher(prompt=context + "\n\n" + base_question)
        
        # Step 2: Evaluate Socratic quality
        critic_out = self.critic(question=teacher_out.question)

        # Step 3: Decide final question
        if "yes" in critic_out.critique.lower():
            final_question = f"{base_question} Consider also: {teacher_out.question}"
        else:
            final_question = base_question

        # Step 4: Get Student’s answer
        student_out = self.student(context=context, question=final_question)

        return {
            "teacher_question": teacher_out.question,
            "critique": critic_out.critique,
            "final_question": final_question,
            "signal": student_out.signal,
            "rationale": student_out.rationale
        }



In [52]:
def log_mars_step(input_q, teacher_q, critique, final_q, final_a,
                  t_latency=None, c_latency=None, s_latency=None,
                  t_agent="teacher", c_agent="critic", s_agent="student",
                  trace_json=None):
    # from db import SessionLocal, MarsStep

    session = SessionLocal()
    step = MarsStep(
        input_question=input_q,
        teacher_question=teacher_q,
        critique=critique,
        final_question=final_q,
        final_answer=final_a,
        teacher_latency=t_latency,
        critic_latency=c_latency,
        student_latency=s_latency,
        teacher_agent=t_agent,
        critic_agent=c_agent,
        student_agent=s_agent,
        trace=trace_json  # ➕ this assumes you added a 'trace' TEXT/JSON column to your table
    )
    session.add(step)
    session.commit()
    session.close()


In [61]:
teacher = TeacherQuestioner()
critic = CriticJudge()
student = MarginAnalyzer()

program = MarsAnalysisProgram(teacher, critic, student)

results = program(
    context=prompt,
    base_question="Is the company improving its profitability?"
)

dspy.inspect_history()
results





[34m[2025-03-25T10:24:21.892041][0m

[31mSystem message:[0m

Your input fields are:
1. `context` (str): Relevant financial data
2. `question` (str): User's trading question

Your output fields are:
1. `signal` (str): Bullish, Bearish, or Neutral
2. `rationale` (str): Explanation for the signal

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## signal ## ]]
{signal}

[[ ## rationale ## ]]
{rationale}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Given the fields `context`, `question`, produce the fields `signal`, `rationale`.


[31mUser message:[0m

[[ ## context ## ]]
You are a financial analysis model. Below are the last 3 income statements from TSLA.

Analyze the trend in revenue and operating income.
Decide if profitability is improving or declining.
Then provide a trading signal.

Respond with:
Signal: <Bullish/Bearish/

{'teacher_question': "To determine if the company is improving its profitability, we need to analyze key financial metrics such as net income, earnings per share (EPS), and possibly other profitability ratios. Let's look at the data provided for two periods: 2023-Q1 and 2024-Q1.\n\n### Key Metrics:\n1. **Net Income**:\n   - 2023-Q1: $2,539 million\n   - 2024-Q1: $1,144 million\n\n2. **Net Income Attributable to Common Stockholders**:\n   - 2023-Q1: $2,513 million\n   - 2024-Q1: $1,129 million\n\n3. **Basic EPS**:\n   - 2023-Q1: $0.8 per share\n   - 2024-Q1: $0.37 per share\n\n4. **Diluted EPS**:\n   - 2023-Q1: $0.73 per share\n   - 2024-Q1: $0.34 per share\n\n5. **Basic Shares Outstanding**:\n   - 2023-Q1: ~3,166 million shares\n   - 2024-Q1: ~3,186 million shares\n\n### Analysis:\n- **Net Income Decline**: The net income decreased significantly from $2.54 billion in Q1 2023 to $1.14 billion in Q1 2024. This is a substantial drop of approximately 54%, which could indicate deteriorating

In [58]:
teacher = TeacherQuestioner()
critic = CriticJudge()
student = MarginAnalyzer()

program = MarsAnalysisProgram(teacher, critic, student)

from dspy.teleprompt import Teleprompter

# Wrap your MarsAnalysisProgram with tracing enabled
tp = Teleprompter(program)

# Run your pipeline through the teleprompter
result = tp(context=prompt, base_question="Is the company improving its profitability?")

print(result)

TypeError: Teleprompter.__init__() takes 1 positional argument but 2 were given

In [59]:
teacher = TeacherQuestioner()
critic = CriticJudge()
student = MarginAnalyzer()

program = MarsAnalysisProgram(teacher, critic, student)

results = program(
    context=prompt,
    base_question="Is the company improving its profitability?"
)

dspy.inspect_history()






[34m[2025-03-25T10:23:37.612565][0m

[31mSystem message:[0m

Your input fields are:
1. `context` (str): Relevant financial data
2. `question` (str): User's trading question

Your output fields are:
1. `signal` (str): Bullish, Bearish, or Neutral
2. `rationale` (str): Explanation for the signal

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## signal ## ]]
{signal}

[[ ## rationale ## ]]
{rationale}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Given the fields `context`, `question`, produce the fields `signal`, `rationale`.


[31mUser message:[0m

[[ ## context ## ]]
You are a financial analysis model. Below are the last 3 income statements from TSLA.

Analyze the trend in revenue and operating income.
Decide if profitability is improving or declining.
Then provide a trading signal.

Respond with:
Signal: <Bullish/Bearish/

In [48]:
import dspy
teacher = TeacherQuestioner()
critic = CriticJudge()
student = MarginAnalyzer()

# Step 1: Teacher generates Socratic question
teacher_out = teacher(prompt=prompt)

# Step 2: Critic evaluates the question
critic_out = critic(question=teacher_out.question)

# Step 3: If the question is valid, pass it to the student
if "yes" in critic_out.critique.lower():
    student_out = student(context=prompt, question="Is profitability improving?", teacher_question=teacher_out.question)
else:
    student_out = student(context=prompt, question="Is profitability improving?")

print("📚 Teacher Question:", teacher_out.question)
print("🧐 Critique:", critic_out.critique)
print("📈 Signal:", student_out.signal)
print("🧠 Rationale:", student_out.rationale)

print(student_out)  # will show all fields in the Prediction

print(student_out.__dict__)


📚 Teacher Question: Given the financial data provided, calculate the diluted earnings per share (EPS) for the fiscal year 2023. The data includes income statements and balance sheets for two periods: Q1 2023 (ending March 31) and Q4 2024 (ending December 31). The question specifically asks for the diluted EPS for 2023, which is derived from the net income attributable to common stockholders and the weighted average number of diluted shares outstanding during that year. 

To compute diluted EPS, the formula is:  
Diluted EPS = Net Income Attributable to Common Stockholders / Weighted Average Diluted Shares Outstanding  

The data shows two periods for 2023: Q1 (January–March) and the remaining quarters (April–December). However, since only Q1 2023 is provided with diluted EPS data, we need to infer if the entire year's diluted EPS can be calculated using this quarter's figures or if additional data from other quarters is required. The problem statement does not provide complete annual d