In [1]:
import litellm

litellm._turn_on_debug()

def query_litellm(model_name: str, prompt: str) -> str:
    """
    Query a LiteLLM-compatible model with a given prompt.

    Args:
        model_name (str): The model to use (e.g. 'ollama/qwen2.5' or 'huggingface/your-model').
        prompt (str): The user's input question.

    Returns:
        str: The model's response text.
    """
    response = litellm.completion(
        model=model_name,
        messages=[{"role": "user", "content": prompt}]
    )

    return response['choices'][0]['message']['content']


In [2]:
from dotenv import load_dotenv

load_dotenv()


def callModels(prompt: str, model1_name:str='ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', model2_name:str='ollama/phi3') -> tuple:
    """
    Call the two models and return their responses."
    """
    model1_response = query_litellm(model_name=model1_name, prompt=prompt)
    model2_response = query_litellm(model_name=model2_name, prompt=prompt)
    return model1_response, model2_response


In [3]:
def build_comparison_prompt(question: str, response_a: str, response_b: str) -> str:
    return f"""
        ### Task:
        You are a financial reasoning expert. Two models have provided investment advice in CoT format.
        Evaluate which advice is more helpful, complete, and appropriate based on reasoning and financial knowledge.

        ### Instructions:
        1. Assess each response based on:
        - Accuracy and factual correctness
        - Clarity and coherence
        - Depth of explanation
        - Relevance to the question

        2. Assign each response a score from **0 to 10**.

        3. Provide a short justification for your scoring.

        ---

        ### Question:
        {question}

        ---

        ### Response A:
        {response_a}

        ---

        ### Response B:
        {response_b}

        ---

        ### Output Format:

        Response A Score: X/10 Response B Score: Y/10

        Winner: Response A or Response B or Tie

        Justification: <your analysis here>
        """

In [4]:
import openai
import sqlite3
import random
from pprint import pprint

# --- Step 1: Define a UserProfile class ---
class UserProfile:
    def __init__(self, name, age, gender, risk_tolerance, investment_goal, investment_horizon_years, current_portfolio, economic_context):
        self.name = name
        self.age = age
        self.gender = gender
        self.risk_tolerance = risk_tolerance
        self.investment_goal = investment_goal
        self.investment_horizon_years = investment_horizon_years
        self.current_portfolio = current_portfolio
        self.economic_context = economic_context

    def to_dict(self):
        return self.__dict__

# --- Step 2: Initialize user profiles ---
user_profiles = [
    UserProfile(
        name="User A",
        age=35,
        gender="female",
        risk_tolerance="moderate",
        investment_goal="retirement",
        investment_horizon_years=20,
        current_portfolio={"stocks": 6000, "bonds": 300, "cash": 1000},
        economic_context="Fed is expected to raise interest rates next quarter"
    ),
    UserProfile(
        name="User B",
        age=25,
        gender="male",
        risk_tolerance="high",
        investment_goal="wealth_growth",
        investment_horizon_years=10,
        current_portfolio={"stocks": 20000, "bonds": 10000, "cash": 300000},
        economic_context="Inflation is steady and market volatility is growing"
    )
]
    

In [5]:
def gen_robo_request(user_profile: UserProfile) -> str:
    profile = user_profile.to_dict()
    return f"<think>{profile['name']} is {profile['age']} years old with a {profile['risk_tolerance']} risk tolerance. " \
        f"Goal is {profile['investment_goal']} in {profile['investment_horizon_years']} years. " \
        f"Current portfolio: {profile['current_portfolio']}. Economic context: {profile['economic_context']}."


In [6]:
conn = sqlite3.connect("fin_r1_advisory.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS user_profiles (
    name TEXT PRIMARY KEY,
    age INTEGER,
    gender TEXT,
    risk_tolerance TEXT,
    investment_goal TEXT,
    horizon_years INTEGER,
    economic_context TEXT,
    current_portfolio TEXT
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS advisory_results (
    user_name TEXT,
    request TEXT,
    fin_r1_output TEXT,
    model_output TEXT,
    report_prompt TEXT,           
    report TEXT,
    FOREIGN KEY(user_name) REFERENCES user_profiles(name)
)
""")
conn.commit()


In [None]:
for profile in user_profiles:
    cursor.execute("""
    INSERT OR REPLACE INTO user_profiles (
        name, age, gender, risk_tolerance, investment_goal, horizon_years,
        economic_context, current_portfolio
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
    (
        profile.name, profile.age, profile.gender, profile.risk_tolerance,
        profile.investment_goal, profile.investment_horizon_years,
        profile.economic_context, str(profile.current_portfolio)
    ))


    request = gen_robo_request(profile)
    qwen_response, phi_response = callModels(request)
    report_prompt = build_comparison_prompt(request, qwen_response, phi_response)
    report = query_litellm('ollama/qwen2.5', report_prompt)

    cursor.execute("""
    INSERT INTO advisory_results (
        user_name, request, fin_r1_output, model_output, report_prompt, report
    ) VALUES (?, ?, ?, ?, ?, ?)""",
    (
        profile.name, request, qwen_response, phi_response, report_prompt, report
    ))

    conn.commit()

    print(report)

    

[92m11:50:30 - LiteLLM:DEBUG[0m: utils.py:311 - 

[92m11:50:30 - LiteLLM:DEBUG[0m: utils.py:311 - [92mRequest to litellm:[0m
[92m11:50:30 - LiteLLM:DEBUG[0m: utils.py:311 - [92mlitellm.completion(model='ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', messages=[{'role': 'user', 'content': "<think>User A is 35 years old with a moderate risk tolerance. Goal is retirement in 20 years. Current portfolio: {'stocks': 6000, 'bonds': 300, 'cash': 1000}. Economic context: Fed is expected to raise interest rates next quarter."}])[0m
[92m11:50:30 - LiteLLM:DEBUG[0m: utils.py:311 - 

[92m11:50:30 - LiteLLM:DEBUG[0m: litellm_logging.py:388 - self.optional_params: {}
[92m11:50:30 - LiteLLM:DEBUG[0m: utils.py:311 - SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
[92m11:50:30 - LiteLLM:INFO[0m: utils.py:3002 - 
LiteLLM completion() model= hf.co/ernanhughes/Fin-R1-Q8_0-GGUF; provider = ollama
[92m11:50:30 - LiteLLM:DEBUG[0m: utils.py:3005 - 
LiteL

Response A Score: 9/10
Response B Score: 7/10
Winner: Response A

### Justification:

**Response A:**
- **Accuracy and Factual Correctness:** The response accurately addresses the user's situation, considering their moderate risk tolerance, retirement goal in 20 years, and the upcoming interest rate hike. It provides a detailed step-by-step plan that is logically sound.
- **Clarity and Coherence:** The advice is presented clearly with a structured approach, making it easy for User A to understand and follow. Each section of the response flows well from one point to another.
- **Depth of Explanation:** Response A offers comprehensive insights into various aspects such as portfolio allocation, diversification, additional savings contributions, and regular rebalancing strategies.
- **Relevance to the Question:** The advice is highly relevant as it directly addresses User A's needs and provides practical steps for achieving their retirement goal.

**Response B:**
- **Accuracy and Factual C

[92m11:53:45 - LiteLLM:INFO[0m: utils.py:1165 - Wrapper: Completed Call, calling success_handler
[92m11:53:45 - LiteLLM:INFO[0m: cost_calculator.py:588 - selected model name for cost calculation: ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF
[92m11:53:45 - LiteLLM:DEBUG[0m: litellm_logging.py:1089 - Logging Details LiteLLM-Success Call: Cache_hit=None
[92m11:53:45 - LiteLLM:DEBUG[0m: utils.py:4300 - checking potential_model_names in litellm.model_cost: {'split_model': 'hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', 'combined_model_name': 'ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', 'stripped_model_name': 'hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', 'combined_stripped_model_name': 'ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF', 'custom_llm_provider': 'ollama'}
[92m11:53:45 - LiteLLM:INFO[0m: cost_calculator.py:588 - selected model name for cost calculation: ollama/hf.co/ernanhughes/Fin-R1-Q8_0-GGUF
[92m11:53:45 - LiteLLM:DEBUG[0m: utils.py:4300 - checking potential_model_names in litellm.model_cost

Response A Score: 8/10
Response B Score: 6/10
Winner: Response A

### Justification:

**Response A:**
- **Accuracy and Factual Correctness:** The response accurately addresses the user's profile, economic context, and investment goals. It correctly identifies the need to balance risk with growth potential.
- **Clarity and Coherence:** The analysis is well-structured and easy to follow, providing a clear rationale for each recommendation.
- **Depth of Explanation:** It delves into key factors like inflation impact, market volatility, and time horizon, offering detailed insights. It also suggests specific strategies within the stock allocation (diversified across sectors) but stops short of diving too deeply into specific investment types or styles.
- **Relevance to the Question:** The response directly addresses the user's profile and provides a comprehensive portfolio optimization strategy.

**Response B:**
- **Accuracy and Factual Correctness:** While it generally aligns with the goal

[92m11:54:17 - LiteLLM:DEBUG[0m: cost_calculator.py:344 - Returned custom cost for model=ollama/qwen2.5 - prompt_tokens_cost_usd_dollar: 0, completion_tokens_cost_usd_dollar: 0
[92m11:54:17 - LiteLLM:DEBUG[0m: litellm_logging.py:899 - response_cost: 0
[92m11:54:17 - LiteLLM:DEBUG[0m: utils.py:4300 - checking potential_model_names in litellm.model_cost: {'split_model': 'qwen2.5', 'combined_model_name': 'ollama/qwen2.5', 'stripped_model_name': 'qwen2.5', 'combined_stripped_model_name': 'ollama/qwen2.5', 'custom_llm_provider': 'ollama'}
[92m11:54:17 - LiteLLM:DEBUG[0m: utils.py:4584 - model_info: {'key': 'qwen2.5', 'litellm_provider': 'ollama', 'mode': 'chat', 'supports_function_calling': True, 'input_cost_per_token': 0.0, 'output_cost_per_token': 0.0, 'max_tokens': 32768, 'max_input_tokens': 32768, 'max_output_tokens': 32768}
[92m11:54:17 - LiteLLM:ERROR[0m: litellm_logging.py:3525 - Error creating standard logging object - __annotations__
Traceback (most recent call last):
  F