In [88]:
import pandas as pd
import numpy  as np
import re                              # Used for splitting the Items with multiple delimiters
from   collections import Counter      # To count items in dictionaries
from   itertools   import combinations # To generate combinations of items

# The dataset was created with the notebook "DatasetCreation.ipynb".

routes = pd.read_csv('route_trips.csv', 
                     delimiter = ',', 
                     encoding  = 'utf-8')

routes.drop(routes.columns[0], 
            axis    = 1, 
            inplace = True)

In [89]:
# Prepare the delimiters for splitting the items
delimiters   = ",", "{", "}", "'"
regexPattern = '|'.join(map(re.escape, delimiters))

In [90]:
# Produce a new column for the Items
newItemsColumn = []
for index in range(0, len(routes)):
    tmp = list(filter(None, re.split(regexPattern, routes.Items[index])))
    newItemsColumn.append([item for item in tmp 
                                   if item.strip()])
routes.Items = newItemsColumn

In [91]:
# Set the threshold for the support of Frequent itemset theory
support = 15
items   = []
items   = [item for items in routes.Items 
                   for item in items]

# Count the occurrencies of each item in the whole dataset
# ("items" now contains a list of all the products for each trip)
itemCount = Counter(items)

# Select only those items that appear more than the support
frequentItems = [item for item in itemCount 
                         if itemCount[item] >= support]

#len(frequentItems)

In [92]:
# Now we remove from the Items those that are not frequentItems
routes.Items = [list(filter(lambda i: i in frequentItems, items)) for items in routes.Items]

In [93]:
# Here we generate combinations of 2 frequent items 
combination   = [list(combinations(items, 2)) for items in routes.Items]
comb          = [comb for combs in combination 
                         for comb in combs]

In [94]:
# Count how many pairs there are for each combination
pairCount     = Counter(comb)
# and select only those that appear more than the support
frequentPairs = [comb for comb in pairCount 
                         if pairCount[comb] >= support]

In [95]:
# then sort them for next operations
frequentPairs = [sorted(fp) for fp in frequentPairs]
#frequentPairs

In [96]:
# popularTuples will be our container for interesting objects. 
# Now we can find inside popular items and popular pairs but it is going to be extended.
popularTuples = frequentItems
popularTuples.extend(frequentPairs)

#popularTuples

In [97]:
def matches(rid, order, popularTuples):
    itemList   = []
    orderItems = routes.Items[(routes.RouteId == rid) & 
                              (routes.Order   == order)
                             ].tolist()[0]
    
    for obj in popularTuples:
        # If the object is a pair (so it is saved as list)
        if isinstance(obj, list):
            check = True
            for item in obj:
                if item not in orderItems:
                    check = False
            if check:
                itemList.append(list(obj))
        else:
        # The other case now can be only a frequent item
            if obj in orderItems:
                itemList.append(obj)
    return itemList
    

def nextTuple(rid, order, popularTuples):
    # If the next does exist
    if(not routes[(routes.RouteId == rid) & 
                  (routes.Order   == order)
                 ].empty):
        return matches(rid, order, popularTuples)
    else:
    # Otherwise return an empty list
        return []

def searchNext(tuple, popularTuples):
    keys      = [(routes.RouteId[row], routes.Order[row]) 
                 for row in range(0, len(routes)) 
                     if tuple in routes.Items[row]]
    newTuples = [nextTuple(key[0],     # the rid
                           key[1] + 1, # the order
                           popularTuples) 
                 for key in keys]
    return newTuples

In [98]:
popularPatterns = [(pop, searchNext(pop, popularTuples)) for pop in popularTuples]
#popularPatterns

In [101]:
popularTuples

['Miels',
 'Brussels-pate',
 'Pesce-curado',
 'White-coconut-chocolate',
 'Green-kale',
 'Banana-nut-loaf',
 'Thes-verts',
 'Baking',
 'Gelati',
 'Chia-seeds',
 'Asian-grocery',
 'Asian-food',
 'Beef-mince',
 'Redcurrant-tea',
 'Dried-pasta',
 'Frozen-spring-rolls',
 'Groceries',
 'Flour-tortillas',
 'Snack-bar',
 'Multivitamin-supplement',
 'Mint-jellies',
 'Faux-fromage',
 'Pineapple-slices-in-juice',
 'Pringles-potato-chips',
 'Choux-farcis',
 'Malt-vinegar',
 'Sugar-cane-drink',
 'Venison',
 'Easter-buns',
 'Garlic-mashed-potatoes',
 'Fresh-juices',
 'Green-salads',
 'Conservation',
 'Beer-batter-fries',
 'Pastrami',
 'Kartoffelchips',
 'Multiseeded-sliced-bread',
 'Pitta-breads',
 'Toppings',
 'Fecules',
 'Cream-of-tartar',
 'Cous-cous',
 'Indian-sauces',
 'Prepackaged-sandwiches',
 'Spice-red-pepper',
 'Schwarztees',
 'Concombres',
 'Sliced-beetroot',
 'Crema-spalmabile',
 'Artisan products',
 'Gateau-chinois',
 'Sparkling-mineral-waters',
 'Balsamic-vinaigrette-dressing',
 'Wate

In [58]:
itemList = []
for items in popularTuples:
    if isinstance(items, list):
        check = True
        for item in items:
            if item not in ['Milkshake', 'Strawberry-licorice']:
                check = False
        if check:
            itemList.append(list(items))
    else:
        if items in ['Milkshake', 'Strawberry-licorice']:
            itemList.append(items)

In [59]:
itemList

[]

In [None]:
isinstance(popularTuples[2], list)

In [None]:
[items for items in popularTuples 
             for item in items 
                 if ((item in ['Milkshake', 'Strawberry-licorice']) & 
                     (isinstance(items,list)))]

In [None]:
[item for item in popularTuples[203]]

In [None]:
set(routes.Items[183])

In [None]:
popularPatterns

In [None]:
[(routes.RouteId[row], routes.Order[row]) for row in range(0,len(routes)) 
                                             if 'Milkshake' in routes.Items[row]]