In [1]:
import random

class Edge:
    
    def __init__(self, node1, node2):
            self.node1 = node1 
            self.node2 = node2
            
    def getNode1(self):
        return self.node1
    
    def getNode2(self):
        return self.node2
            

In [2]:
documentInput = set()

with open("web-Stanford.txt") as f:
    
    for eachLine in f:
            
        lineArr = eachLine.split()
        
        if lineArr[0] != lineArr[1]:
            
            documentInput.add(Edge(lineArr[0], lineArr[1]))

In [5]:
class Triest:
    def __init__(self, sampleSize):
        
        self.T = {}
        self.M = sampleSize
        self.t = 0
        self.TG = 0
        self.collectedSamples = set()
        
        
    def getGlobalCount(self):
            return self.TG
    
            
    def getLocalCount(self):
            return self.T
            
            
    def run(self, edgeInput):
        
         print("Defined Sample Size: ",  self.M)
         
         print("Dataset length: ",  len(edgeInput))
        
         for edge in edgeInput:
            self.t += 1
            
            # Counter moved above If block
            
            self.update_counters(edge)
                
            if self.reservoirSample(edge):    
                print("Edge ", edge.getNode1(), " and ", edge.getNode2() ,"at ", self.TG, " taken into sample")
                self.collectedSamples.add(edge)
               
    
    #IMPROVED
    
    def maxIMPRCalc(self):
        
        maximum = max(1, ( (self.t-1)*(self.t-2) / self.M*(self.M-1) ))  
            
        return maximum 
            
        
    def reservoirSample(self, edge):
        
        if self.t <= self.M:          
            return True
        
        if random.random() <= (self.M/self.t):
            
            remove_el = random.sample(self.collectedSamples, 1)[0]
            print("Edge ", remove_el.getNode1(), " and ", remove_el.getNode2() , " evicted")
            self.collectedSamples.remove(remove_el)
            return True
        
        return False

    def update_counters(self, edge):    
        
        s1 = set()
        
        s2 = set()
        
        for e in self.collectedSamples:
            if e.getNode1() == edge.getNode1():
                s1.add(e.getNode2())
            if e.getNode2() == edge.getNode1():
                s1.add(e.getNode1())
                
            if e.getNode1() == edge.getNode2():
                s2.add(e.getNode2())
            if e.getNode2() == edge.getNode2():
                s2.add(e.getNode1())
                
        for c in (s1 & s2 ):
                        
                self.TG+= self.maxIMPRCalc()
                self.T[c] = self.T.get(c, 0)+ self.maxIMPRCalc()
                self.T[edge.node2] = self.T.get(edge.node2, 0)+ self.maxIMPRCalc()
                self.T[edge.node1] = self.T.get(edge.node1, 0)+ self.maxIMPRCalc()
                    
   

In [6]:
obj = Triest(100)

obj.run(documentInput)

print("MAX ", obj.maxIMPRCalc())

print("Global Count", obj.getGlobalCount())

print(obj.getLocalCount())

Defined Sample Size:  100
Dataset length:  2312501
Edge  276778  and  63637 at  0  taken into sample
Edge  122374  and  165189 at  0  taken into sample
Edge  223403  and  226411 at  0  taken into sample
Edge  276778  and  65761 at  0  taken into sample
Edge  122374  and  177014 at  0  taken into sample
Edge  223403  and  234704 at  0  taken into sample
Edge  276778  and  84219 at  0  taken into sample
Edge  122374  and  226290 at  0  taken into sample
Edge  223403  and  236584 at  0  taken into sample
Edge  276778  and  102188 at  0  taken into sample
Edge  122374  and  243180 at  0  taken into sample
Edge  223403  and  245659 at  0  taken into sample
Edge  276778  and  105972 at  0  taken into sample
Edge  122374  and  244195 at  0  taken into sample
Edge  214710  and  34573 at  0  taken into sample
Edge  276778  and  122216 at  0  taken into sample
Edge  122374  and  247252 at  0  taken into sample
Edge  214710  and  38342 at  0  taken into sample
Edge  276778  and  132239 at  0  tak

Edge  245701  and  61390  evicted
Edge  86097  and  167295 at  533625.84  taken into sample
Edge  20779  and  258793  evicted
Edge  215723  and  166748 at  533625.84  taken into sample
Edge  214710  and  34573  evicted
Edge  122408  and  114813 at  533625.84  taken into sample
Edge  25546  and  36674  evicted
Edge  20783  and  33587 at  533625.84  taken into sample
Edge  148755  and  27904  evicted
Edge  193605  and  164139 at  533625.84  taken into sample
Edge  148755  and  87888  evicted
Edge  86097  and  193105 at  610412.22  taken into sample
Edge  222868  and  33138  evicted
Edge  219345  and  84219 at  691673.4  taken into sample
Edge  122408  and  114813  evicted
Edge  215723  and  262860 at  691673.4  taken into sample
Edge  31916  and  27831  evicted
Edge  122408  and  153516 at  691673.4  taken into sample
Edge  236015  and  261780  evicted
Edge  219345  and  102188 at  691673.4  taken into sample
Edge  225099  and  208566  evicted
Edge  86097  and  206995 at  691673.4  taken

Edge  42804  and  178459  evicted
Edge  27813  and  88062 at  4725178.920000001  taken into sample
Edge  175287  and  247252  evicted
Edge  33144  and  84382 at  5451435.000000001  taken into sample
Edge  122432  and  43370  evicted
Edge  33144  and  92842 at  6938062.560000001  taken into sample
Edge  86097  and  117083  evicted
Edge  86113  and  239306 at  7688254.860000001  taken into sample
Edge  215723  and  166748  evicted
Edge  86114  and  81435 at  7688254.860000001  taken into sample
Edge  181749  and  214128  evicted
Edge  33144  and  259343 at  7688254.860000001  taken into sample
Edge  86097  and  244395  evicted
Edge  267952  and  75851 at  7688254.860000001  taken into sample
Edge  205210  and  215625  evicted
Edge  267952  and  94762 at  7688254.860000001  taken into sample
Edge  158929  and  167295  evicted
Edge  267952  and  131001 at  7688254.860000001  taken into sample
Edge  158929  and  214128  evicted
Edge  280421  and  101203 at  7688254.860000001  taken into sam

Edge  69026  and  129556  evicted
Edge  261994  and  148106 at  142459823.88  taken into sample
Edge  190355  and  86137  evicted
Edge  206067  and  280643 at  142459823.88  taken into sample
Edge  30900  and  20787  evicted
Edge  196518  and  40829 at  142459823.88  taken into sample
Edge  185892  and  175298  evicted
Edge  50569  and  241587 at  142459823.88  taken into sample
Edge  280643  and  259455  evicted
Edge  196518  and  120708 at  142459823.88  taken into sample
Edge  175408  and  117864  evicted
Edge  86173  and  192929 at  142459823.88  taken into sample
Edge  186803  and  170281  evicted
Edge  180915  and  243556 at  142459823.88  taken into sample
Edge  86169  and  27904  evicted
Edge  199646  and  122497 at  142459823.88  taken into sample
Edge  123481  and  223403  evicted
Edge  194959  and  176790 at  142459823.88  taken into sample
Edge  214707  and  20799  evicted
Edge  140279  and  34249 at  142459823.88  taken into sample
Edge  175290  and  248139  evicted
Edge  

Edge  122481  and  178759  evicted
Edge  122093  and  95028 at  185900243.76000002  taken into sample
Edge  120984  and  122570  evicted
Edge  105697  and  81435 at  185900243.76000002  taken into sample
Edge  155667  and  122383  evicted
Edge  196972  and  226411 at  185900243.76000002  taken into sample
Edge  33151  and  203322  evicted
Edge  147000  and  255754 at  185900243.76000002  taken into sample
Edge  32610  and  8806  evicted
Edge  95662  and  85919 at  185900243.76000002  taken into sample
Edge  20835  and  44178  evicted
Edge  53682  and  210403 at  185900243.76000002  taken into sample
Edge  232895  and  175529  evicted
Edge  53682  and  226411 at  185900243.76000002  taken into sample
Edge  199224  and  86737  evicted
Edge  257960  and  175067 at  185900243.76000002  taken into sample
Edge  147252  and  167295  evicted
Edge  13860  and  22906 at  185900243.76000002  taken into sample
Edge  225099  and  65761  evicted
Edge  208840  and  105607 at  185900243.76000002  take

Edge  138781  and  7934  evicted
Edge  6689  and  226411 at  185900243.76000002  taken into sample
Edge  50461  and  68009  evicted
Edge  236409  and  13842 at  185900243.76000002  taken into sample
Edge  85869  and  93073  evicted
Edge  224412  and  72193 at  185900243.76000002  taken into sample
Edge  77449  and  154124  evicted
Edge  218097  and  20706 at  185900243.76000002  taken into sample
Edge  178330  and  241454  evicted
Edge  239324  and  256298 at  185900243.76000002  taken into sample
Edge  33024  and  254350  evicted
Edge  33945  and  10431 at  185900243.76000002  taken into sample
Edge  181008  and  27281  evicted
Edge  279258  and  241563 at  185900243.76000002  taken into sample
Edge  69161  and  16858  evicted
Edge  33945  and  246750 at  185900243.76000002  taken into sample
Edge  25117  and  255896  evicted
Edge  97795  and  129971 at  185900243.76000002  taken into sample
Edge  265732  and  122484  evicted
Edge  207313  and  216433 at  185900243.76000002  taken int

Edge  239059  and  120903  evicted
Edge  231111  and  171390 at  1233606652.74  taken into sample
Edge  163904  and  38342  evicted
Edge  206183  and  39075 at  1233606652.74  taken into sample
Edge  186342  and  134735  evicted
Edge  220109  and  258706 at  1233606652.74  taken into sample
Edge  206183  and  39075  evicted
Edge  156551  and  180548 at  1233606652.74  taken into sample
Edge  207313  and  216433  evicted
Edge  59169  and  151707 at  1233606652.74  taken into sample
Edge  175141  and  94717  evicted
Edge  184936  and  89549 at  1233606652.74  taken into sample
Edge  95662  and  85919  evicted
Edge  32688  and  167295 at  1233606652.74  taken into sample
Edge  163893  and  77737  evicted
Edge  172983  and  82411 at  1233606652.74  taken into sample
Edge  231111  and  171390  evicted
Edge  148181  and  143742 at  1233606652.74  taken into sample
Edge  108419  and  168447  evicted
Edge  120785  and  276237 at  1233606652.74  taken into sample
Edge  95473  and  105525  evict

Edge  33048  and  68328  evicted
Edge  13435  and  118834 at  1233606652.74  taken into sample
Edge  246391  and  166837  evicted
Edge  29515  and  66726 at  1233606652.74  taken into sample
Edge  102403  and  153450  evicted
Edge  121308  and  176529 at  1233606652.74  taken into sample
Edge  25560  and  27815  evicted
Edge  220858  and  201471 at  1233606652.74  taken into sample
Edge  19055  and  34600  evicted
Edge  84265  and  141370 at  1233606652.74  taken into sample
Edge  174227  and  130612  evicted
Edge  34080  and  62478 at  1233606652.74  taken into sample
Edge  129218  and  208291  evicted
Edge  231727  and  234033 at  1233606652.74  taken into sample
Edge  260285  and  276869  evicted
Edge  252877  and  122498 at  1233606652.74  taken into sample
Edge  92461  and  226411  evicted
Edge  231727  and  280288 at  1233606652.74  taken into sample
Edge  50080  and  43650  evicted
Edge  84075  and  164914 at  1233606652.74  taken into sample
Edge  18562  and  224759  evicted
Ed

Edge  200678  and  141691  evicted
Edge  244084  and  150148 at  1233606652.74  taken into sample
Edge  84075  and  164914  evicted
Edge  183740  and  233938 at  1233606652.74  taken into sample
Edge  190617  and  185960  evicted
Edge  185374  and  191230 at  1233606652.74  taken into sample
Edge  72294  and  31516  evicted
Edge  18037  and  83341 at  1233606652.74  taken into sample
Edge  168025  and  212392  evicted
Edge  240110  and  178443 at  1233606652.74  taken into sample
Edge  120267  and  53055  evicted
Edge  165337  and  245659 at  1233606652.74  taken into sample
Edge  189098  and  52751  evicted
Edge  169534  and  167295 at  1233606652.74  taken into sample
Edge  30457  and  214128  evicted
Edge  231876  and  234704 at  1233606652.74  taken into sample
Edge  48421  and  113008  evicted
Edge  56025  and  53380 at  1233606652.74  taken into sample
Edge  29549  and  72157  evicted
Edge  235541  and  198090 at  1233606652.74  taken into sample
Edge  240850  and  228695  evicte

Edge  256884  and  97896  evicted
Edge  231775  and  167295 at  1233606652.74  taken into sample
Edge  96197  and  17768  evicted
Edge  277114  and  146705 at  1233606652.74  taken into sample
Edge  112555  and  105607  evicted
Edge  237582  and  182757 at  1233606652.74  taken into sample
Edge  241926  and  69244  evicted
Edge  114303  and  139746 at  1233606652.74  taken into sample
Edge  154750  and  12650  evicted
Edge  91907  and  105607 at  1233606652.74  taken into sample
Edge  111993  and  251796  evicted
Edge  29393  and  56902 at  1233606652.74  taken into sample
Edge  213634  and  251796  evicted
Edge  259772  and  218205 at  1233606652.74  taken into sample
Edge  158275  and  253042  evicted
Edge  143829  and  222237 at  1233606652.74  taken into sample
Edge  4071  and  19413  evicted
Edge  33720  and  128287 at  1233606652.74  taken into sample
Edge  116968  and  139924  evicted
Edge  178755  and  70349 at  1233606652.74  taken into sample
Edge  198322  and  247956  evicte

Edge  5181  and  220469  evicted
Edge  137858  and  272442 at  2463547803718.32  taken into sample
Edge  75662  and  185168  evicted
Edge  126915  and  52254 at  2463547803718.32  taken into sample
Edge  137858  and  272442  evicted
Edge  43681  and  226805 at  2463547803718.32  taken into sample
Edge  71053  and  248088  evicted
Edge  112019  and  95511 at  2463547803718.32  taken into sample
Edge  251075  and  105607  evicted
Edge  95541  and  259439 at  2463547803718.32  taken into sample
Edge  18921  and  124478  evicted
Edge  56879  and  209071 at  2463547803718.32  taken into sample
Edge  206790  and  156013  evicted
Edge  82743  and  134190 at  2463547803718.32  taken into sample
Edge  158019  and  110633  evicted
Edge  197390  and  50785 at  2463547803718.32  taken into sample
Edge  19051  and  165257  evicted
Edge  227955  and  151446 at  2463547803718.32  taken into sample
Edge  4283  and  3674  evicted
Edge  16653  and  244948 at  2463547803718.32  taken into sample
Edge  16