In [None]:
from pydantic import BaseModel, Field
from typing import Dict, List, Optional, Any
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import json
from goldmansachs.awm_genai import ConverseAILangchainLLM, JsonFormer

class EarningsMetadata(BaseModel):
    quarter_s: str
    year_s: str
    company_s: str
    doc_type_s: str
    tags_s: List[str]
    event_time_s: str
    companyId_s: str
    tickers_s: List[str]
    cusips_s: List[str]
    isins_s: List[str]
    sedols_s: List[str]
    document_date_s: str
    objecturl: Optional[str] = None

class ECTConfig:
    HEADINGS = {
        "Revenue": ["company wide revenue", "segment-wise revenue", "revenue drivers"],
        "Margins and Cashflow": ["margins", "cash drivers"],
        "Guidance": ["Playbook", "Guidance", "projection"],
        "Capital Allocation": ["capital allocation", "capex", "share repurchase"],
        "M&A": ["M&A", "merger acquisitions"],
        "Inventory & Pricing Strategy": ["inventory", "pricing strategy"],
        "Macro Environment": ["Macro Environment", "Labor", "Supply Chain"],
        "Products & R&D": ["product development", "Product lines", "new products", "R&D"]
    }

class ECTAnalyzer:
    def __init__(self, gssso_token: str):
        self.gssso_token = gssso_token
        self.llm = ConverseAILangchainLLM.from_defaults('fluentai', model_name='gpt-4o')
        
    async def get_earnings_documents(self, company_id: str) -> List[EarningsMetadata]:
        query = f"metadata_txt:{company_id}"
        headers = {
            "Cookie": f"GSSSO={self.gssso_token}",
            "responseType": "application/json"
        }
        base_url = "https://vip.gsam.prod.search.lex.site.gs.com/searchService/rest/search/bulk/query/table:com.gs.swm.gena1"
        full_url = f"{base_url}?%20AND%20({query})"
        
        async with aiohttp.ClientSession() as session:
            retry = 0
            while retry < 5:
                try:
                    async with session.get(full_url, headers=headers) as response:
                        if response.status != 200:
                            retry += 1
                            await asyncio.sleep(1)
                            continue
                            
                        content = await response.text()
                        soup = BeautifulSoup(content, 'lxml')
                        documents = []
                        
                        for doc in soup.find_all("lexDocument"):
                            try:
                                metadata_field = doc.find('field', {'name': 'metadata_txt'})
                                if metadata_field:
                                    metadata_txt = metadata_field.find('values').text.strip()
                                    metadata = EarningsMetadata(**json.loads(metadata_txt))
                                    documents.append(metadata)
                            except Exception as e:
                                print(f"Error parsing document metadata: {e}")
                                
                        return documents
                        
                except Exception as e:
                    print(f"Request error: {e}")
                    retry += 1
                    await asyncio.sleep(1)
                    
        return []

    async def get_earnings_content(self, metadata: EarningsMetadata) -> str:
        if not metadata.objecturl:
            return ""
            
        headers = {"Cookie": f"GSSSO={self.gssso_token}"}
        async with aiohttp.ClientSession() as session:
            try:
                async with session.get(metadata.objecturl, headers=headers) as response:
                    if response.status == 200:
                        return await response.text()
            except Exception as e:
                print(f"Error fetching content: {e}")
        return ""

    def generate_ect_prompts(self, ect: str, company_id: str, html_prompt: str = "") -> List[Dict]:
        items = list(ECTConfig.HEADINGS.items())
        bucket_size = 4
        prompt_buckets = [items[i:i + bucket_size] for i in range(0, len(items), bucket_size)]
        
        prompts = []
        for bucket_id, bucket in enumerate(prompt_buckets):
            prompt_str = self._format_bucket_prompt(bucket, ect, html_prompt)
            prompts.append({
                "prompt": prompt_str,
                "company": company_id,
                "type": "ECT",
                "bucket": bucket_id,
                "model": "gpt-4o"
            })
        return prompts

    def _format_bucket_prompt(self, bucket: List[tuple], ect: str, html_prompt: str) -> str:
        metric_count = len(bucket)
        prompt = f" Answer the following {metric_count} questions using content from the Earnings call text specified below. "
        prompt += "Make sure to include all supporting information available in the text without repeating information.\n"
        prompt += f"Make sure the response is able to answer the {metric_count} questions below {html_prompt}\n"
        
        for i, (heading, metrics) in enumerate(bucket, 1):
            metrics_str = ", ".join(metrics)
            prompt += f"{i}. How is the {metrics_str} of the company? "
            prompt += f"Give an in-depth response under the sub-heading: {heading}. "
            prompt += "Use up to 7 bullets to formulate the response.\n"
            
        prompt += f"The Earnings call text is as follows: \n{ect}"
        return prompt

    async def execute_prompts(self, prompts: List[Dict]) -> List[Dict]:
        class AnalysisOutput(BaseModel):
            analysis: str = Field(..., description="Detailed analysis of the given aspect")
            confidence: float = Field(..., ge=0, le=1, description="Confidence score of the analysis")

        responses = []
        for prompt in prompts:
            try:
                response = JsonFormer(
                    schema=AnalysisOutput,
                    llm=self.llm
                ).invoke(prompt["prompt"])
                
                responses.append({
                    "result": response.analysis,
                    "type": prompt["type"],
                    "heading": prompt.get("heading", ""),
                    "confidence": response.confidence,
                    "bucket": prompt.get("bucket", 0)
                })
            except Exception as e:
                print(f"Error executing prompt: {e}")

        return responses

    async def get_enhanced_ect(self, company_id: str) -> Dict:
        documents = await self.get_earnings_documents(company_id)
        if not documents:
            return {"error": "No earnings documents found"}
            
        documents.sort(key=lambda x: x.document_date_s, reverse=True)
        latest_doc = documents[0]
        
        content = await self.get_earnings_content(latest_doc)
        if not content:
            return {"error": "Could not fetch earnings content"}
            
        prompts = self.generate_ect_prompts(content, company_id)
        responses = await self.execute_prompts(prompts)
        
        analysis_by_heading = {}
        for response in responses:
            if response["confidence"] > 0.7:
                analysis_by_heading[response["heading"]] = response["result"]
        
        return {
            "metadata": {
                "company": latest_doc.company_s,
                "period": f"Q{latest_doc.quarter_s} {latest_doc.year_s}",
                "date": latest_doc.document_date_s,
                "ticker": latest_doc.tickers_s[0] if latest_doc.tickers_s else None
            },
            "analysis": analysis_by_heading
        }

async def main():
    analyzer = ECTAnalyzer(gssso_token="your_token")
    result = await analyzer.get_enhanced_ect("107357")  # Apple's company ID
    print(json.dumps(result, indent=2))

if __name__ == "__main__":
    asyncio.run(main())