In [26]:
from typing import NamedTuple, List, Dict, Set
from pathlib import Path
import csv
from dataclasses import dataclass

from relationality.fields import Histo, entropy

class MessageEvent(NamedTuple):
    timestamp: int
    source: str
    dest: str


@dataclass
class MessageEventSeries:
    events: List[MessageEvent]
    sources: Set[str]
    dests: Set[str]

    @classmethod
    def from_iterator(cls, it) -> 'MessageEventSeries':
        events = []
        sources = set()
        dests = set()
        for event in it:
            events.append(event)
            sources.add(event.source)
            dests.add(event.dest)
        return cls(sorted(events), sources, dests)
    
    @classmethod
    def from_path(cls, path: Path) -> 'MessageEventSeries':
        with path.open('r') as fh:
            return cls.from_iterator(
                    MessageEvent(int(tsstring), source, dest)
                    for (source, dest, tsstring) in csv.reader(fh))

    def __iter__(self):
        return iter(self.events)


@dataclass
class Node:
    histo: Histo
    entropy: float


class Model:
    series: MessageEventSeries
    nodes: Dict[str, Node]
    dests_list: List[str]
    dests_indices: Dict[str, int]
    entropy: float

    def __init__(self, series: MessageEventSeries):
        # Filter out destinations that are never sources
        # This could be made optional of course
        self.dests_list = list(series.sources & series.dests)
        self.dests_indices = {dest: k for k, dest in enumerate(self.dests_list)}
        self.series = series
        self.reset()

    @classmethod
    def from_path(cls, path: Path) -> 'Model':
        return cls(MessageEventSeries.from_path(path))

    def reset(self):
        dests_count = len(self.dests_list)
        init_entropy = 0 #entropy(Histo.uniform(dests_count))
        self.nodes = {
            source: Node(Histo.uniform(dests_count), init_entropy)
            for source in self.series.sources}
        self.entropy = dests_count

    def step(self, event: MessageEvent):
        node = self.nodes.get(event.source)
        if node is None:
            raise ValueError(f"{event.source} has no node")

        old_entropy = node.entropy
        dest_index = self.dests_indices.get(event.dest)
        if dest_index is None:
            return None

        node.histo[dest_index] += 1
        node.entropy = entropy(node.histo)
        node.delta_entropy = node.entropy - old_entropy
        self.entropy += node.delta_entropy
        return node


In [27]:
    enron_path = Path("../data/enron.csv")
    model = Model.from_path(enron_path)

    steps = 0
    for ev in model.series:
        node = model.step(ev)
        if node is None:
            continue

        if steps % 10 == 0:
            print(node.delta_entropy)
        steps += 1

    print("Entropies")

6.7894977792004605
-0.020295130232825365
-0.0009396443801694687
-0.021502754342315633
-0.018279203798567067
-0.0039319503139152445
6.7894977792004605
-0.004276020619133902
-0.010213382149331096
-0.009123903633726727
-0.014713801628712275
-0.011397467246895943
-0.0015117079670865508
-0.003167746972772889
-0.021240179951111493
-0.0030900998657186207
-0.018209291651092308
-0.01934004079588636
-0.014661463594006108
-0.015307852279073941
-0.015409326985082572
-0.009196612698168849
-0.012529942473459954
-0.01569654683183419
-0.01939202027460407
-0.010388211021951754
-0.0030471245736318764
-0.012391412364165078
-0.0015260187439931627
-0.0029775296408374174
-0.01011571525141175
-0.010924651140406638
-0.01116657096085838
-0.011053832755182569
-0.013522009274924862
-0.012753272281800676
-0.013123496029800563
-0.01177708274599265
-0.009289237913810311
-0.011141540338479139
-0.014281235978407025
-0.013621884355655922
-0.010244643563252609
-0.013623420097555794
-0.011475883601591796
-0.011443964010

-0.017507362052297637
-0.0012264432215740584
-0.014827875138320401
-0.02214688268068965
-0.012165950479760212
-0.001518737903738021
-0.017141247027285367
0.0035841110262531117
-0.018021379741454346
-0.0005267379763091995
-0.016684482778000387
0.004034892229894282
0.004151203676108217
-0.0018275317524931722
-0.008289334980517538
-0.014196611608481291
-0.015103085817712802
-0.01065464857485754
-0.018283077870654196
-0.017302295025887204
-0.01809174590885032
0.002546998512256593
-0.008837016323997027
-0.004522297109319062
-0.006905139636581126
0.0027543869343205074
0.007702262478098909
-0.0002796824482445004
0.0044396700047384385
-0.017028133647520427
-0.002322497866306783
-0.01236522148304342
-0.01364967951880569
0.0001192676232761869
-0.006122092123021616
-0.017700202445936952
-0.011245330257326103
-0.009351549071051402
-0.013731488475874798
-0.006741345187972314
-0.0017573519261651072
-0.01911153106756558
-0.016267247098237014
0.006929393252815608
-0.01857277745469421
-0.01575481490042

In [None]:
//TODO: Add set with number of people who have received a message, normalize against it. 