## Email Analysis *Extract, Load, and Transform* Notebook

### Imports

In [1]:
# Standard Library Imports
import logging
from typing import List, Dict, Any
from datetime import datetime

# Third-Party Imports
import pypff # type: ignore

# Local Imports
from src.utils.config import Config
from src.extract.pst_extractor import PstMessageExtractor, PrimaryEmailFeatures, ProcessedBatch
from src.transform.primary_features import PrimaryFeaturesExtractor
#from src.transform.derived_features import DerivedFeaturesExtractor
#from src.load.data_loader import DataLoader

logging.basicConfig(level=logging.INFO)

### Configuration

In [2]:
config: Config = Config.from_json("config.json")
extractor: PstMessageExtractor = PstMessageExtractor(config.input_pst_path, config.chunk_size)
primary_extractor: PrimaryFeaturesExtractor = PrimaryFeaturesExtractor()
#derived_extractor: DerivedFeaturesExtractor = DerivedFeaturesExtractor()
#loader: DataLoader = DataLoader(config.output_directory)

### Extract

In [3]:
for chunk in extractor.extract_messages():
        processed_messages: List[PrimaryEmailFeatures] = []
        
        for message in chunk.messages:
            primary_features: PrimaryEmailFeatures = primary_extractor.extract(message, chunk.folder_path)
            #derived_features: Dict[str, Any] = derived_extractor.extract(primary_features.dict())
            
            email_message = PrimaryEmailFeatures(
                **primary_features.model_dump(),
                #**derived_features
            )
            
            processed_messages.append(email_message)
        
        processed_batch: ProcessedBatch = ProcessedBatch(
            batch_id=f"batch_{datetime.now().isoformat()}",
            processed_at=datetime.now(),
            messages=processed_messages
        )
        #loader.load(processed_batch)

INFO:root:Skipping empty folder: Deleted Items
INFO:root:Extracting messages from folder: Inbox
ERROR:root:Error getting value for key subject: pypff_message_get_conversation_topic: unable to retrieve conversation topic size. libuna_unicode_character_copy_from_utf16_stream: unsupported UTF-16 character. libuna_utf8_string_size_from_utf16_stream: unable to copy Unicode character from UTF-16 stream. libpff_mapi_value_get_data_as_utf8_string_size: unable to determine size of value data as UTF-8 string. libpff_record_entry_get_data_as_utf8_string_size_with_codepage: unable to determine size of value data as UTF-8 string. libpff_internal_item_get_entry_value_utf8_string_size: unable to retrieve UTF-8 string size. libpff_message_get_entry_value_utf8_string_size: unable to retrieve UTF-8 string size.
INFO:root:Skipping empty folder: Outbox
INFO:root:Skipping empty folder: Sync Issues (This computer only)
INFO:root:Skipping empty folder: [Gmail]
INFO:root:Skipping empty folder: Banking
INFO:ro