## Email Analysis *Extract, Load, and Transform* Notebook

### Imports

In [1]:
# Standard Library Imports
import logging
from typing import List, Dict, Any
from datetime import datetime

# Third-Party Imports
import pypff # type: ignore

# Local Imports
from src.utils.config import Config
from src.extract.pst_message_extractor import PstMessageExtractor, PstMessage
from src.extract.message_parser import MessageParser, ParsedMessage
from src.transform.message_enricher import MessageEnricher, EnrichedMessage
#from src.load.data_loader import DataLoader, DataLoadResult, DataLoadError

logging.basicConfig(level=logging.INFO)

### Configuration

In [2]:
config: Config = Config.from_json("config.json")
extractor: PstMessageExtractor = PstMessageExtractor(config.input_pst_path, config.chunk_size)
message_parser: MessageParser = MessageParser()
#derived_extractor: DerivedFeaturesExtractor = DerivedFeaturesExtractor()
#loader: DataLoader = DataLoader(config.output_directory)

### ETL Pipeline with Error Handling

In [3]:
for message_batch in extractor.extract_messages():
        #enriched_messages: List[EnrichedMessage] = []
        #failed_messages: List[Union[PstExtractError, ParseError, EnrichError, 
        
        for pst_message in message_batch.messages:
            parsed_message: ParsedMessage = message_parser.extract(pst_message.message, pst_message.folder_name)
            enriched_message: EnrichedMessage = MessageEnricher.enrich(parsed_message)
            
            #enriched_messages.append(enriched_message)
        
        #loader.load(enriched_messages)

INFO:root:Skipping empty folder: Deleted Items
INFO:root:Extracting messages from folder: Inbox
INFO:root:Found single valid address m7mdalsahal@gmail.com for key From. Header value: Mohammed Alsahal <m7mdalsahal@gmail.com>
INFO:root:Found 42 valid addresses for key To. Header value: 20180065@ariu.edu.qa, 2862633102@qq.com, Ahmed.malik456@yahoo.com,  Bassil@ecommeta.uk, Hamadibrahim1117@gmail.com,  Khaledalhalabi2022@outlook.com, Mohammed Alsahal <m7mdalsahal@gmail.com>,  Sadie.halim@yahoo.com, a.abdoun2005@gmail.com, a.abu_el_rub@qatar.tamu.edu,  adanj@andrew.cmu.edu, ahel61783@hbku.edu.qa, allencapule2003@gmail.com,  alraimi365@gmail.com, aymankaraki@hotmail.com, belal.mnur@gmail.com,  cyrahpuray@gmail.com, "devanga@andrew.cmu.edu" <devanga@andrew.cmu.edu>, graissov@andrew.cmu.edu,  gunelhuseynova2027@u.northwestern.edu, ha.halaamin@gmail.com,  haog2@andrew.cmu.edu, hazemm.els@gmail.com, hjoad@andrew.cmu.edu,  huda.gedawy@gmail.com, jamil.daoud2004@gmail.com, l.al-nuaimi@hotmail.co.u