In [30]:
import random

import numpy as np
import pandas as pd

Our metric is the Expected Degrees of Separation, EDS, between Shop A and Shop B. That is, imagine a token is given to a customer of Shop A at random. The customer is told to give the token to a customer in the next shop they visit, again at random, and to mark the token. At each step the customer is told to do the same thing, so that the number of marks on the token records the number of customers the token has passed through. If the token comes to Shop B, it is taken, and the number of marks is recorded. EDS is the expected number of marks on the token at the destination shop.

Suppose we have a transition matrix, the probability that a customer Shop A also shops at Shop B.

From Shop A	  |To Shop B       |P(A -> B)
--------------|----------------|--------
Tesco, Bristol|	Boots, Bath	   |0.1
Tesco, Bristol|	Asda, Bristol  |0.2
Asda, Bristol |	Currys, Bristol|0.1

We are looking for a table of the form:

Source Shop A |	Destination Shop B |EDS(A -> … -> B)
--------------|--------------------|-----------------
Tesco, Bristol|	Boots, Bath|3.5
Tesco, Bristol|Asda, Bristol|2.3
Tesco, Bristol|	Currys, Bristol|4.0



In [64]:
# cities = ['bristol', 'bath', 'torquay', 'falmouth', 'gloucester', 'frome']
# brands = ['tesco', 'boots', 'currys', 'asda', 'specsavers', 'whsmith']

cities = range(40)
brands = range(25)

shops = [(brand, city) for city in cities for brand in brands]
shops[:5]

[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0)]

In [65]:
tm_dict = {}
idx = 0
for from_shop in range(len(shops)):
    for to_shop in range(len(shops)):
        if from_shop != to_shop:
            transition_probability = np.random.randint(1,100)
            tm_dict[idx] = {
                'from_shop': shops[from_shop],
                'to_shop': shops[to_shop],
                'p_also_shops': transition_probability,
            }
            idx += 1

transition_matrix = pd.DataFrame(tm_dict).T
transition_matrix = transition_matrix[['from_shop', 'to_shop', 'p_also_shops']]
transition_matrix.head()

Unnamed: 0,from_shop,to_shop,p_also_shops
0,"(0, 0)","(1, 0)",45
1,"(0, 0)","(2, 0)",67
2,"(0, 0)","(3, 0)",29
3,"(0, 0)","(4, 0)",19
4,"(0, 0)","(5, 0)",73


In [66]:
possible_transitions = {}
for shop_A in shops:
    possible_transitions[shop_A] = transition_matrix[
        transition_matrix['from_shop'] == shop_A
    ][['to_shop', 'p_also_shops']]

In [67]:
n_trials = 5

for row in transition_matrix.index[:10]:
    shop_A = transition_matrix.loc[row, 'from_shop']
    shop_B = transition_matrix.loc[row, 'to_shop']
    results = []
    for trial in range(n_trials):
        current_shop = shop_A
        marks = 0
        while current_shop != shop_B:
            possible_destinations = possible_transitions[current_shop]
            probs = possible_destinations['p_also_shops'].astype(float)
            probs = probs / probs.sum()
            marks += 1
            current_shop = np.random.choice(
                possible_destinations['to_shop'],
                p=probs
            )
        results.append(marks)
    print(f'From {shop_A} to {shop_B}: {np.array(results).mean()}')

From (0, 0) to (1, 0): 1092.4
From (0, 0) to (2, 0): 1224.2
From (0, 0) to (3, 0): 1120.4
From (0, 0) to (4, 0): 337.2
From (0, 0) to (5, 0): 745.8
From (0, 0) to (6, 0): 1737.2
From (0, 0) to (7, 0): 481.2
From (0, 0) to (8, 0): 1323.0
From (0, 0) to (9, 0): 1104.2
From (0, 0) to (10, 0): 437.6
