|
| 1 | +// Package threading provides generic email thread reconstruction. |
| 2 | +// It can reconstruct threading relationships even when In-Reply-To |
| 3 | +// and References headers are missing, using subject matching, |
| 4 | +// date proximity, and embedded message hints. |
| 5 | +package threading |
| 6 | + |
| 7 | +import "time" |
| 8 | + |
| 9 | +// ThreadableMessage is the interface that messages must implement |
| 10 | +// for thread reconstruction. |
| 11 | +type ThreadableMessage interface { |
| 12 | + // GetMessageID returns the unique message identifier. |
| 13 | + GetMessageID() string |
| 14 | + |
| 15 | + // GetDate returns the message date. |
| 16 | + GetDate() time.Time |
| 17 | + |
| 18 | + // GetSubject returns the message subject. |
| 19 | + GetSubject() string |
| 20 | + |
| 21 | + // GetInReplyTo returns the In-Reply-To header value (may be empty). |
| 22 | + GetInReplyTo() string |
| 23 | + |
| 24 | + // GetReferences returns the References header values (may be empty). |
| 25 | + GetReferences() []string |
| 26 | + |
| 27 | + // GetParticipants returns all email addresses involved in the message |
| 28 | + // (From, To, Cc, Bcc). |
| 29 | + GetParticipants() []string |
| 30 | + |
| 31 | + // GetEmbeddedMessageHints returns hints about embedded/quoted messages |
| 32 | + // that can be used for threading when headers are missing. |
| 33 | + GetEmbeddedMessageHints() []EmbeddedHint |
| 34 | + |
| 35 | + // SetThreadingInfo is called after reconstruction to provide |
| 36 | + // the computed threading information back to the message. |
| 37 | + SetThreadingInfo(info ThreadingInfo) |
| 38 | +} |
| 39 | + |
| 40 | +// EmbeddedHint represents information about a message embedded in the body, |
| 41 | +// such as a quoted reply or forwarded message. |
| 42 | +type EmbeddedHint struct { |
| 43 | + // SenderPattern is a pattern to match against participant addresses |
| 44 | + // (e.g., "john.smith" or "john.smith@enron.com"). |
| 45 | + SenderPattern string |
| 46 | + |
| 47 | + // Date is the date of the embedded message (if parseable). |
| 48 | + Date time.Time |
| 49 | + |
| 50 | + // Subject is the subject of the embedded message (if available). |
| 51 | + Subject string |
| 52 | + |
| 53 | + // Type indicates the type of embedding: "reply", "forward", "quoted". |
| 54 | + Type string |
| 55 | +} |
| 56 | + |
| 57 | +// ThreadingInfo contains the computed threading information for a message. |
| 58 | +type ThreadingInfo struct { |
| 59 | + // ThreadID is a unique identifier for the thread this message belongs to. |
| 60 | + ThreadID string |
| 61 | + |
| 62 | + // ParentID is the MessageID of the parent message in the thread. |
| 63 | + // Empty if this is a root message. |
| 64 | + ParentID string |
| 65 | + |
| 66 | + // References is the reconstructed chain of message IDs leading to this message. |
| 67 | + References []string |
| 68 | + |
| 69 | + // Depth is the nesting depth in the thread (0 for root messages). |
| 70 | + Depth int |
| 71 | +} |
| 72 | + |
| 73 | +// Thread represents a collection of related messages. |
| 74 | +type Thread struct { |
| 75 | + // ID is a unique identifier for the thread. |
| 76 | + ID string `json:"id"` |
| 77 | + |
| 78 | + // Subject is the normalized subject of the thread. |
| 79 | + Subject string `json:"subject"` |
| 80 | + |
| 81 | + // RootMessageID is the MessageID of the first message in the thread. |
| 82 | + RootMessageID string `json:"root_message_id"` |
| 83 | + |
| 84 | + // MessageIDs contains all message IDs in the thread, sorted by date. |
| 85 | + MessageIDs []string `json:"message_ids"` |
| 86 | + |
| 87 | + // Participants contains all unique email addresses in the thread. |
| 88 | + Participants []string `json:"participants"` |
| 89 | + |
| 90 | + // StartDate is the date of the first message. |
| 91 | + StartDate time.Time `json:"start_date"` |
| 92 | + |
| 93 | + // EndDate is the date of the last message. |
| 94 | + EndDate time.Time `json:"end_date"` |
| 95 | + |
| 96 | + // Size is the number of messages in the thread. |
| 97 | + Size int `json:"size"` |
| 98 | +} |
0 commit comments