In [None]:
class EarningsFetcher:
    """
    Fetches earnings call transcripts and metadata from financial data providers
    """
    
    def __init__(self, 
                 gssso_token: str, 
                 base_url: str,
                 cache_enabled: bool = True,
                 max_concurrent: int = 5):
        """Initialize earnings fetcher"""
        self.request_handler = RequestHandler(
            base_url=base_url,
            auth_token=gssso_token,
            timeout=30,
            max_retries=3
        )
        self.cache_enabled = cache_enabled
        self.max_concurrent = max_concurrent
        self._metadata_cache: Dict[str, List[EarningsMetadata]] = {}
        self._semaphore = asyncio.Semaphore(max_concurrent)
    
    async def __aenter__(self):
        """Enter async context"""
        await self.request_handler.__aenter__()
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Exit async context"""
        await self.request_handler.__aexit__(exc_type, exc_val, exc_tb)
    
    async def close(self):
        """Close request handler and release resources"""
        await self.request_handler.close()
    
    # ----- Public API Methods -----
    
    async def get_latest_earning(self, 
                                refinitiv_id: Union[str, List[str]]) -> List[EarningTranscript]:
        """Get the latest earning transcripts for company IDs"""
        # Normalize input to list
        company_ids = [refinitiv_id] if isinstance(refinitiv_id, str) else refinitiv_id
        
        if not company_ids:
            logger.warning("No company IDs provided for get_latest_earning")
            return []
        
        logger.info(f"Getting latest earnings for {len(company_ids)} companies")
        
        # Get metadata for all companies
        metadata_list = await self._get_earnings_metadata(company_ids)
        
        # Get latest metadata for each company
        latest_metadata = []
        for company_id in company_ids:
            company_metadata = [m for m in metadata_list if m.companyId_s == company_id]
            latest = self._get_latest_metadata(company_metadata)
            if latest:
                latest_metadata.append(latest)
                logger.info(f"Found latest earnings for {latest.company_s} from {latest.document_date_s}")
            else:
                logger.warning(f"No latest earnings found for company ID {company_id}")
        
        if not latest_metadata:
            logger.warning(f"No latest metadata found for any companies: {company_ids}")
            return []
        
        # Fetch transcripts for latest metadata
        transcripts = await self._fetch_transcripts(latest_metadata)
        logger.info(f"Retrieved {len(transcripts)} latest transcripts")
        
        return transcripts
    
    async def get_filtered_earnings(self,
                                   refinitiv_id: Union[str, List[str]],
                                   year: Optional[int] = None,
                                   quarter: Optional[str] = None,
                                   max_per_company: Optional[int] = None) -> List[EarningTranscript]:
        """Get filtered earnings transcripts by year and quarter"""
        # Normalize input to list
        company_ids = [refinitiv_id] if isinstance(refinitiv_id, str) else refinitiv_id
        
        if not company_ids:
            logger.warning("No company IDs provided for get_filtered_earnings")
            return []
        
        filter_desc = []
        if year:
            filter_desc.append(f"year={year}")
        if quarter:
            filter_desc.append(f"quarter={quarter}")
        if max_per_company:
            filter_desc.append(f"max={max_per_company}")
        
        filter_str = ", ".join(filter_desc) if filter_desc else "no filters"
        logger.info(f"Getting filtered earnings for {len(company_ids)} companies with {filter_str}")
        
        # Get metadata for all companies
        metadata_list = await self._get_earnings_metadata(company_ids)
        
        # Apply filters for each company
        filtered_metadata = []
        for company_id in company_ids:
            company_metadata = [m for m in metadata_list if m.companyId_s == company_id]
            filtered = self._filter_metadata(company_metadata, year, quarter)
            
            # Sort by date (newest first)
            filtered.sort(
                key=lambda m: parse_date(m.document_date_s) or datetime.min,
                reverse=True
            )
            
            # Apply max limit
            if max_per_company and len(filtered) > max_per_company:
                filtered = filtered[:max_per_company]
            
            logger.info(f"Found {len(filtered)} filtered earnings for company ID {company_id}")
            filtered_metadata.extend(filtered)
        
        if not filtered_metadata:
            logger.warning(f"No matching metadata found for {filter_str}")
            return []
        
        # Fetch transcripts for filtered metadata
        transcripts = await self._fetch_transcripts(filtered_metadata)
        logger.info(f"Retrieved {len(transcripts)} filtered transcripts")
        
        return transcripts
    
    async def clear_cache(self):
        """Clear metadata cache"""
        self._metadata_cache.clear()
        logger.info("Metadata cache cleared")
    
    # ----- Internal Methods -----
    
    async def _get_earnings_metadata(self, 
                                    company_ids: List[str],
                                    force_refresh: bool = False) -> List[EarningsMetadata]:
        """Fetch earnings metadata for company IDs"""
        if not company_ids:
            return []
        
        # Check cache first
        if not force_refresh and self.cache_enabled:
            cache_key = ','.join(sorted(company_ids))
            if cache_key in self._metadata_cache:
                logger.debug(f"Using cached metadata for {len(company_ids)} companies")
                return self._metadata_cache[cache_key]
        
        logger.info(f"Fetching earnings metadata for {len(company_ids)} companies")
        
        # Construct query
        query_parts = [f"metadata_txt:{quote(company_id)}" for company_id in company_ids]
        query = " OR ".join(query_parts)
        endpoint = f"/search/bulk/query/%20AND%20({query})"
        
        try:
            # Make API request
            content = await self.request_handler.get(endpoint)
            metadata_list = self._parse_metadata_response(content)
            
            # Update cache
            if self.cache_enabled:
                cache_key = ','.join(sorted(company_ids))
                self._metadata_cache[cache_key] = metadata_list
            
            return metadata_list
            
        except RequestError as e:
            logger.error(f"Error fetching metadata: {e}")
            raise
        except Exception as e:
            logger.exception(f"Unexpected error fetching metadata: {e}")
            raise ContentFetchError(f"Failed to fetch metadata: {str(e)}")
    
    def _parse_metadata_response(self, content: str) -> List[EarningsMetadata]:
        """Parse metadata from API response"""
        try:
            result = []
            soup = BeautifulSoup(content, "lxml")
            
            for doc in soup.find_all("lexDocument"):
                metadata_dict = {}
                url = None
                
                for field in doc.find_all('field'):
                    name = field.find('name')
                    values = field.find('values')
                    
                    if name is None or values is None:
                        continue
                    
                    field_name = name.text.strip()
                    field_value = values.text.strip()
                    
                    if field_name == "metadata_txt":
                        try:
                            metadata_dict = json.loads(field_value)
                        except json.JSONDecodeError:
                            logger.error(f"Invalid JSON in metadata_txt: {field_value[:100]}...")
                    elif field_name == "objecturl":
                        url = field_value
                
                if metadata_dict:
                    # Add URL to metadata
                    if url:
                        metadata_dict["objecturl"] = url
                    
                    try:
                        earnings_metadata = EarningsMetadata(**metadata_dict)
                        result.append(earnings_metadata)
                    except ValidationError as e:
                        logger.error(f"Invalid metadata format: {e}")
            
            logger.debug(f"Parsed {len(result)} metadata entries")
            return result
            
        except Exception as e:
            logger.exception(f"Error parsing metadata response: {e}")
            raise ParsingError(f"Failed to parse metadata response: {str(e)}")
    
    def _get_latest_metadata(self, metadata_list: List[EarningsMetadata]) -> Optional[EarningsMetadata]:
        """Get latest metadata from list"""
        if not metadata_list:
            return None
        
        # Filter entries with valid dates
        valid_metadata = []
        for metadata in metadata_list:
            if metadata.document_date_s:
                date = parse_date(metadata.document_date_s)
                if date:
                    valid_metadata.append((metadata, date))
        
        if not valid_metadata:
            return None
        
        # Return metadata with latest date
        return max(valid_metadata, key=lambda x: x[1])[0]
    
    def _filter_metadata(self, 
                        metadata_list: List[EarningsMetadata],
                        year: Optional[int] = None,
                        quarter: Optional[str] = None) -> List[EarningsMetadata]:
        """Filter metadata by year and quarter"""
        if not metadata_list:
            return []
        
        if year is None and quarter is None:
            return metadata_list
        
        filtered = []
        
        for metadata in metadata_list:
            if not metadata.document_date_s:
                continue
                
            date = parse_date(metadata.document_date_s)
            if not date:
                continue
                
            # Apply year filter
            if year is not None and date.year != year:
                continue
                
            # Apply quarter filter
            if quarter is not None:
                doc_quarter = f"Q{(date.month - 1) // 3 + 1}"
                if doc_quarter != quarter:
                    continue
                    
            filtered.append(metadata)
        
        return filtered
    
    async def _fetch_transcripts(self, metadata_list: List[EarningsMetadata]) -> List[EarningTranscript]:
        """Fetch transcripts for metadata"""
        if not metadata_list:
            return []
        
        logger.info(f"Fetching {len(metadata_list)} transcripts")
        
        async def fetch_with_semaphore(metadata: EarningsMetadata) -> Optional[EarningTranscript]:
            async with self._semaphore:
                return await self._fetch_single_transcript(metadata)
        
        # Create fetch tasks
        fetch_tasks = [fetch_with_semaphore(metadata) for metadata in metadata_list]
        
        # Execute tasks concurrently
        results = await asyncio.gather(*fetch_tasks, return_exceptions=True)
        
        # Process results
        transcripts = []
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                logger.error(f"Error fetching transcript: {result}")
            elif result is not None:
                transcripts.append(result)
        
        return transcripts
    
    async def _fetch_single_transcript(self, metadata: EarningsMetadata) -> Optional[EarningTranscript]:
        """Fetch transcript for single metadata"""
        if not metadata.objecturl:
            logger.warning(f"No object URL for metadata: {metadata}")
            return None
        
        logger.debug(f"Fetching transcript for {metadata.company_s} ({metadata.document_date_s})")
        
        try:
            # Make direct request to objecturl
            response_text = await self.request_handler.get(metadata.objecturl, {}, {})
            
            return EarningTranscript(
                metadata=metadata,
                transcript=response_text
            )
            
        except RequestError as e:
            logger.error(f"Error fetching transcript for {metadata.companyId_s}: {e}")
            return None
        except Exception as e:
            logger.exception(f"Unexpected error fetching transcript: {e}")
            return None