In [9]:
import os
from typing import Dict, List, Optional, TypedDict
from dataclasses import dataclass
import pandas as pd
import json
from langchain_anthropic import ChatAnthropic
from langchain.agents import create_sql_agent
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.graph import StateGraph, START, END
from dotenv import load_dotenv

# Load environment variables
load_dotenv('api_key.env')

class AnalysisState(TypedDict):
    """State for the analysis workflow"""
    user_query: str
    decomposed_questions: List[str]
    sql_results: Dict
    analysis: str
    final_output: Dict

class ConfigError(Exception):
    """Custom exception for configuration errors"""
    pass

@dataclass
class Config:
    db_path: str = "apple_last_year_data.csv"
    sqlite_path: str = "sqlite:///consumption.db"
    model_name: str = "claude-3-sonnet-20240229"
    
    @property
    def api_key(self) -> str:
        api_key = os.getenv("ANTHROPIC_API_KEY")
        if not api_key:
            raise ConfigError("ANTHROPIC_API_KEY not found in api_key.env file")
        return api_key

SQL_AGENT_PROMPT = """You are an expert financial database analyst. Analyze questions and create SQL queries for stock data analysis.

Available Columns: date, open, high, low, close, volume

Example 1:
User: "Show me the performance metrics for last week"
Thought: Need to calculate key metrics including price changes, trading ranges, and volume patterns for the past week
SQL:
SELECT 
    date,
    ROUND(open, 2) as open_price,
    ROUND(close, 2) as close_price,
    ROUND(((close - open) / open * 100), 2) as daily_change_percent,
    ROUND(high, 2) as high_price,
    ROUND(low, 2) as low_price,
    volume
FROM consumption
WHERE date >= date('now', '-7 days')
ORDER BY date DESC;

Example 2:
User: "Identify days with high volatility"
Thought: Looking for days with large price ranges and above-average volume
SQL:
WITH avg_metrics AS (
    SELECT 
        AVG(volume) as avg_volume,
        AVG((high - low) / open * 100) as avg_range_percent
    FROM consumption
)
SELECT 
    date,
    ROUND(open, 2) as open_price,
    ROUND(close, 2) as close_price,
    ROUND(((high - low) / open * 100), 2) as price_range_percent,
    volume,
    ROUND(volume / avg_volume, 2) as volume_ratio
FROM consumption, avg_metrics
WHERE (high - low) / open * 100 > avg_range_percent
    AND volume > avg_volume
ORDER BY price_range_percent DESC;

Format your response as:
1. Thought: Your analysis approach
2. SQL: Your query
3. Explanation: Interpret the results
"""

ANALYST_PROMPT = """You are an expert financial analyst. Given SQL query results, provide comprehensive market analysis.

Example 1:
Data: Price trend data with volumes
Analysis:
1. Price Trend: Identify direction and strength
2. Volume Analysis: Look for confirmation/divergence
3. Key Levels: Support/resistance points
4. Pattern Recognition: Technical patterns
5. Risk Assessment: Volatility and liquidity

Example 2:
Data: Volatility metrics
Analysis:
1. Volatility Trends: Changes in price ranges
2. Volume Impact: Relationship with price moves
3. Market Sentiment: What patterns suggest
4. Risk Levels: Current vs historical
5. Trading Implications: What data suggests

Provide:
1. Key Findings: Main observations
2. Technical Analysis: Pattern analysis
3. Risk Assessment: Current risk levels
4. Action Items: Suggested next steps
"""

class IntegratedAnalyzer:
    def __init__(self, config: Config):
        self.config = config
        self.db = self._init_database()
        self.llm = self._init_llm()
        self.sql_agent = self._setup_sql_agent()
        self.workflow = self._create_workflow()

    def _init_database(self) -> SQLDatabase:
        df = pd.read_csv(self.config.db_path)
        df.to_sql('consumption', 'sqlite:///consumption.db', index=False, if_exists='replace')
        return SQLDatabase.from_uri(self.config.sqlite_path)

    def _init_llm(self) -> ChatAnthropic:
        return ChatAnthropic(
            model=self.config.model_name,
            temperature=0,
            api_key=self.config.api_key
        )

    def _setup_sql_agent(self):
        toolkit = SQLDatabaseToolkit(db=self.db, llm=self.llm)
        return create_sql_agent(
            llm=self.llm,
            toolkit=toolkit,
            agent_type="zero-shot-react-description",
            verbose=True,
            prefix=SQL_AGENT_PROMPT
        )

    def _decompose_question(self, state: AnalysisState) -> AnalysisState:
        prompt = f"""Break down this analysis question into specific sub-questions:
        Question: {state['user_query']}
        Return only numbered questions."""
        
        response = self.llm.invoke([SystemMessage(content=prompt)])
        questions = [q.strip().split(". ", 1)[1] for q in response.content.split("\n") 
                    if q.strip() and q[0].isdigit()]
        
        return {**state, "decomposed_questions": questions}

    def _run_sql_analysis(self, state: AnalysisState) -> AnalysisState:
        results = {}
        for i, question in enumerate(state["decomposed_questions"]):
            result = self.sql_agent.invoke({"input": question})
            results[f"question_{i+1}"] = {
                "question": question,
                "thought": self._extract_thought(result['output']),
                "sql": self._extract_sql(result['output']),
                "result": self._extract_result(result['output'])
            }
        
        return {**state, "sql_results": results}

    def _analyze_results(self, state: AnalysisState) -> AnalysisState:
        results_text = json.dumps(state["sql_results"], indent=2)
        analysis = self.llm.invoke([
            SystemMessage(content=ANALYST_PROMPT),
            HumanMessage(content=f"Analyze these results for query '{state['user_query']}':\n{results_text}")
        ])
        
        return {**state, "analysis": analysis.content}

    def _format_output(self, state: AnalysisState) -> AnalysisState:
        final_output = {
            "query": state["user_query"],
            "decomposed_questions": state["decomposed_questions"],
            "sql_analysis": state["sql_results"],
            "expert_analysis": state["analysis"],
            "timestamp": pd.Timestamp.now().isoformat()
        }
        
        return {**state, "final_output": final_output}

    def _extract_thought(self, text: str) -> str:
        if "Thought:" in text:
            return text.split("Thought:")[1].split("SQL")[0].strip()
        return ""

    def _extract_sql(self, text: str) -> str:
        if "SQL:" in text:
            return text.split("SQL:")[1].split("Result")[0].strip()
        return ""

    def _extract_result(self, text: str) -> str:
        if "SQLResult:" in text:
            return text.split("SQLResult:")[1].strip()
        return ""

    def _create_workflow(self) -> StateGraph:
        workflow = StateGraph(AnalysisState)
        
        workflow.add_node("decompose", self._decompose_question)
        workflow.add_node("sql_analysis", self._run_sql_analysis)
        workflow.add_node("analyze", self._analyze_results)
        workflow.add_node("format", self._format_output)
        
        workflow.add_edge(START, "decompose")
        workflow.add_edge("decompose", "sql_analysis")
        workflow.add_edge("sql_analysis", "analyze")
        workflow.add_edge("analyze", "format")
        workflow.add_edge("format", END)
        
        return workflow.compile()

    def analyze(self, query: str) -> Dict:
        try:
            final_state = None
            for state in self.workflow.stream({
                "user_query": query,
                "decomposed_questions": [],
                "sql_results": {},
                "analysis": "",
                "final_output": {}
            }):
                final_state = state
            
            return final_state["final_output"]
        except Exception as e:
            return {"error": str(e), "query": query}

def format_output(results: Dict) -> None:
    print("\n=== Stock Analysis Results ===")
    print(f"\nQuery: {results['query']}")
    
    print("\nDecomposed Questions:")
    for i, q in enumerate(results['decomposed_questions'], 1):
        print(f"{i}. {q}")
    
    print("\nSQL Analysis:")
    for key, data in results['sql_analysis'].items():
        print(f"\nQuestion: {data['question']}")
        print(f"Thought Process: {data['thought']}")
        print(f"SQL Query: {data['sql']}")
        try:
            result_data = json.loads(data['result'])
            df = pd.DataFrame(result_data)
            print("\nResults:")
            print(df.to_string(index=False))
        except:
            print(f"Results: {data['result']}")
    
    print("\nExpert Analysis:")
    print(results['expert_analysis'])

def main():
    try:
        config = Config()
        analyzer = IntegratedAnalyzer(config)
        
        queries = [
            "Show me the stock's trend over the last week with daily price ranges",
            "Find days where volume was significantly above average",
            "Analyze the relationship between daily price ranges and volume"
        ]
        
        for query in queries:
            print(f"\nProcessing: {query}")
            print("=" * 50)
            
            results = analyzer.analyze(query)
            
            if "error" not in results:
                format_output(results)
                
                # Save to JSON
                filename = f"analysis_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json"
                with open(filename, 'w') as f:
                    json.dump(results, f, indent=2)
                print(f"\nDetailed results saved to {filename}")
            else:
                print(f"Error: {results['error']}")

if __name__ == "__main__":
    main()

SyntaxError: expected 'except' or 'finally' block (3801869233.py, line 290)