# Beam search implementation

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import csv
from queue import Queue

In [2]:
#pd.read_table('data/meta_data.tsv', encoding='latin-1')
df_action = pd.read_csv('data/action_condition_meta.csv')
df_action.head()

Unnamed: 0,action,user_id,condition,geo_country,refr_source,browser_language,os_name,os_timezone,dvce_type
0,clic,379881d5-32d7-49f4-bf5b-81fefbc5fcce,1-Control,FI,Google,greek,Android,Europe,Mobile
1,clic,2a0f4218-4f62-479b-845c-109b2720e6e7,2-Buttony-Conversion-Buttons,AU,Google,english,iOS,Australia,Mobile
2,clic,a511b6dc-2dca-455b-b5e2-bf2d224a5505,2-Buttony-Conversion-Buttons,GB,Google,english,Android,Europe,Mobile
3,clic,9fb616a7-4e13-4307-ac92-0b075d7d376a,2-Buttony-Conversion-Buttons,FI,Google,english,iOS,Europe,Mobile
4,clic,64816772-688d-4460-a591-79aa49bba0d5,2-Buttony-Conversion-Buttons,BD,Google,english,Android,Asia,Mobile


In [3]:
import heapq
from heapq import heappush, heappop

class priority_queue:
    def __init__(self, max_size):
        self.items = []
        self.max = max_size
   
    def push(self, item, priority):
        if len(self.items) < self.max:
            heapq.heappush(self.items, (priority, item))
        else:
            heapq.heappushpop(self.items, (priority, item))
            

    def pop(self):
        return heapq.heappop(self.items)

    def get_max_item(self):
        return self.items[0]
    
    def empty(self):
        return not self.items
    
    def print_elements(self):
        result = []
        for i in self.items:
            result.append(i)
        return result
       
    def heap_sort(self):
        return [heapq.heappop(self.items) for _ in range(len(self.items))]

In [4]:
heap = priority_queue(2)
heap.push("browser_language = EN", 1)
heap.push("OS_Name = iOS", 2)
heap.push("browser_language = NL", 3)
heap.push("browser_language = GE", 5)
priority_queue.get_max_item(heap)

(3, 'browser_language = NL')

In [5]:
print(priority_queue.heap_sort(heap))

[(3, 'browser_language = NL'), (5, 'browser_language = GE')]


In [6]:
df_action.shape

(899, 9)

In [7]:
#def phiYule(Set):
def phiYule(Set, column_name):
    n_1 = df_action.loc[(df_action[column_name] == Set) & (df_action.action == 'view') & (df_action.condition == '1-Control')].count()[0]
    n_2 = df_action.loc[(df_action[column_name] == Set) & (df_action.action == 'clic') & (df_action.condition == '1-Control')].count()[0]
    n_3 = df_action.loc[(df_action[column_name] == Set) & (df_action.action == 'view') & (df_action.condition == '2-Buttony-Conversion-Buttons')].count()[0]
    n_4 = df_action.loc[(df_action[column_name] == Set) & (df_action.action == 'clic') & (df_action.condition == '2-Buttony-Conversion-Buttons')].count()[0]
    yuleQ = (n_1*n_4 - n_2*n_3)/(n_1*n_4-n_2*n_3)
    return yuleQ

In [8]:
def constraints(Set, column_name):
    return df_action.loc[(df_action[column_name] == Set)].count()[0] > 5

In [9]:
def refinement(seed):
    descriptions = df_action[seed].unique()
    return descriptions

In [10]:
#def phiSet(desc):
   
#def phiEntropy(Set):
       
def beam_search(d, w, q):
    candidateQueue = list(df_action.columns.values)
    candidateQueue.remove('action')
    candidateQueue.remove('condition')
    
    resultSet = priority_queue(q)
    for level in range(0, d):
        print("level: "+str(level))
        beam = priority_queue(w)
        while not(candidateQueue):
            descriptor = candidateQueue.pop(0)
            seed = descriptor
            set_refined = refinement(seed)
            for desc in set_refined:
                quality = phiYule(desc, descriptor)
                if constraints(desc, descriptor):
                    resultSet.push(desc, quality)
                    beam.push(desc, quality)
            while not beam.empty:
                candidateQueue.append(priority_queue.pop(beam))     
    return resultSet

result = beam_search(d=1, w=5, q=5)
priority_queue.heap_sort(result)

level: 0


[]