In [162]:
%load_ext google.cloud.bigquery

import numpy as np
import pandas as pd
from operator import mul
from functools import reduce

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


#### Fake dataset 1 - prosty graf

In [None]:
fake1 = pd.DataFrame([
     {'prev':'START', 'next':'A', 'prob':0.3}
    ,{'prev':'START', 'next':'B', 'prob':0.7}
    ,{'prev':'A', 'next':'C', 'prob':0.2}
    ,{'prev':'A', 'next':'D', 'prob':0.8}
    ,{'prev':'B', 'next':'D', 'prob':1.0}
    ,{'prev':'C', 'next':'N_CONV', 'prob':1.0}
    ,{'prev':'D', 'next':'N_CONV', 'prob':0.1}
    ,{'prev':'D', 'next':'CONV', 'prob':0.9}
])

print(fake1)

In [183]:
class Crawler:
    graph = pd.DataFrame()
    active_crawlers = []
    paths = []
    probs_conv = []
    probs_n_conv = []
    current_index = 0
    verbose = False
    
    def __init__(self, history, probs):
        if Crawler.graph.empty:
            Crawler.message('Before using Crawler you must load a graph! (Crawler.load_graph())')
        else:
            Crawler.active_crawlers.append(self)
            self.index = Crawler.current_index
            Crawler.current_index += 1
            self.history = history
            self.probs = probs
            Crawler.message('Created crawler{}, active crawlers: {} ({})'.format(self.index, len(Crawler.active_crawlers), self.history))
    
    def _step(self):
        current_node = self.history[-1]
        exits = Crawler.graph[Crawler.graph['prev'] == current_node]['next']
        if exits.empty:
            Crawler.message('\t\tCrawler{} finished'.format(self.index))
            Crawler.message('\t\tCrawler{}\'s path: {} '.format(self.index, self.history))
            Crawler.message('\t\tCrawler{}\'s probs: {} '.format(self.index, self.probs))
            if current_node == 'CONV':
                Crawler.paths.append(self.history)
                Crawler.probs_conv.append(self.probs)
            if current_node == 'N_CONV':
                Crawler.paths.append(self.history)
                Crawler.probs_n_conv.append(self.probs)

        else:
            for e in exits:             
                p = Crawler.graph.loc[(Crawler.graph['prev'] == current_node) & (Crawler.graph['next'] == e)]['prob'].to_list()
                new = Crawler(self.history.copy(), self.probs.copy())
                new.history.append(e)
                new.probs += p
                new._step()
        idx = self.index
        Crawler.active_crawlers.remove(self)
        Crawler.message('Removed crawler{}, active crawlers: {}'.format(idx, len(Crawler.active_crawlers)))
    
    @staticmethod
    def message(txt):
        if Crawler.verbose:
            print(txt)
            
    @staticmethod
    def set_verbose(verbose=True):
        Crawler.verbose = verbose
        
    
    @staticmethod
    def start():
        c0 = Crawler(['START'], [])
        c0._step()
            
    @staticmethod
    def load_graph(graph):
        Crawler.graph = graph
        
    @staticmethod
    def result():
        conv = 0
        n_conv = 0
        for plist in Crawler.probs_conv:
            conv += reduce(mul, plist)
        for plist in Crawler.probs_n_conv:
            n_conv += reduce(mul, plist)
        return(conv, n_conv)
    

In [184]:
Crawler.load_graph(fake1)
print(Crawler.result())
Crawler.start()
print(Crawler.result())

(0, 0)
(0.846, 0.15399999999999997)
