# FinSpeak NLU Demo
## Natural Language Understanding for Financial Queries

This notebook demonstrates intent detection and entity extraction.

In [None]:
import sys
sys.path.append('..')

from fin_speak.nlp import detect_intent, extract_fund, extract_time_period
from fin_speak.kb import load_funds_data, match_fund, get_fund_info

## Test Queries

In [None]:
test_queries = [
    "What is the current NAV of Vanguard S&P 500 Fund?",
    "Show me 6 month returns for Fidelity Growth Fund",
    "How has Wellington Fund performed over 1 year?",
    "Get me the latest price of PIMCO Total Return Fund",
    "Why did the fund price change?",
    "What are the 3 month returns for BlackRock Global Allocation?",
]

## Intent Detection

In [None]:
import pandas as pd

intent_results = []

for query in test_queries:
    result = detect_intent(query)
    intent_results.append({
        'query': query,
        'intent': result['intent'],
        'confidence': f"{result['confidence']:.2%}",
        'period_months': result.get('period_months'),
        'method': result['method']
    })

df_intents = pd.DataFrame(intent_results)
df_intents

## Fund Extraction

In [None]:
fund_results = []

for query in test_queries:
    fund_name, confidence = extract_fund(query)
    
    fund_id = None
    category = None
    if fund_name:
        fund_id = match_fund(fund_name)
        if fund_id:
            info = get_fund_info(fund_id)
            category = info['category'] if info else None
    
    fund_results.append({
        'query': query[:50] + '...' if len(query) > 50 else query,
        'extracted_fund': fund_name,
        'confidence': f"{confidence:.2%}",
        'fund_id': fund_id,
        'category': category
    })

df_funds = pd.DataFrame(fund_results)
df_funds

## Time Period Extraction

In [None]:
period_tests = [
    "6 month returns",
    "1 year performance",
    "3 month gains",
    "2 year returns",
    "one year",
    "six months",
]

for test in period_tests:
    months = extract_time_period(test)
    print(f"{test:20s} -> {months} months")

## Accuracy Analysis

In [None]:
# Intent detection accuracy
correct_intents = [
    'get_nav',
    'get_return',
    'get_return',
    'get_nav',
    'explain_change',
    'get_return',
]

intent_accuracy = sum(
    1 for i, r in enumerate(intent_results) 
    if r['intent'] == correct_intents[i]
) / len(correct_intents)

print(f"Intent Detection Accuracy: {intent_accuracy:.2%}")

# Fund extraction success rate
fund_success = sum(1 for r in fund_results if r['fund_id'] is not None) / len(fund_results)
print(f"Fund Extraction Success Rate: {fund_success:.2%}")

## Training a Simple ML Classifier (Optional)

This section shows how to train a simple TF-IDF + LogisticRegression classifier for intent detection.

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Training data (expanded)
training_data = [
    ("What is the current NAV?", "get_nav"),
    ("Show me the latest NAV", "get_nav"),
    ("Current price of the fund", "get_nav"),
    ("What's the value today?", "get_nav"),
    ("Get me the NAV", "get_nav"),
    
    ("Show me returns", "get_return"),
    ("How has the fund performed?", "get_return"),
    ("What are the 6 month returns?", "get_return"),
    ("Performance over the year", "get_return"),
    ("How much has it grown?", "get_return"),
    
    ("Why did the price change?", "explain_change"),
    ("Explain the drop in value", "explain_change"),
    ("Reason for the increase?", "explain_change"),
    ("What caused the change?", "explain_change"),
]

texts = [t[0] for t in training_data]
labels = [t[1] for t in training_data]

# Train classifier
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

clf = LogisticRegression(max_iter=1000)
clf.fit(X, labels)

# Test on our queries
X_test = vectorizer.transform(test_queries)
predictions = clf.predict(X_test)

for query, pred in zip(test_queries, predictions):
    print(f"{query[:50]:50s} -> {pred}")