In [5]:
import pandas as pd
import numpy as np
import random

# Customer Class

In [13]:
class Customer:
    
    """
    
    A single customer moving around the Doodl supermarket in a MCMC simulation.
    
    """

    def __init__(self, id, probs, section = "entrance", name = "Alice", budget=100 ):
        self.name = name
        self.id = id
        self.section = section 
        self.transition_probs = probs
        self.budget= budget

    def next_section(self):
        
        """ 
        
        Propagates the customer to the next section in the supermarket. 
        Returns nothing. 
        
        """

        self.section = random.choices(["checkout", "diary", "drinks", "fruit", "spices"], list(probs.loc[self.section]))[0]
    
    def is_active(self): # detect churned customers
        
        """
        
        Returns True if the customer has not reached the checkout yet.
        
        """
        
        if self.section != 'checkout': 
            print(f" Customer is active ")
            return True
        else:
            print(f" Customer is inactive ")
            return False


    
    def __repr__(self):
        return f"The customer {self.name} with an id of {self.id} a budget of {self.budget} $ is at the section of {self.section} in Doodl supermarket."
    

In [14]:
cust1 = Customer(1, probs)
cust1

The customer Alice with an id of 1 a budget of 100 $ is at the section of entrance in Doodl supermarket.

# Simulation for one customer.

In [2]:
customers = pd.read_csv("customer_data_extended.csv", index_col = 0)
customers

Unnamed: 0_level_0,location,time,weekday,date,hour,minute,customer_id
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-09-02 07:03:00,dairy,07:03:00,0.0,2019-09-02,7,3,0_1
2019-09-02 07:03:00,dairy,07:03:00,0.0,2019-09-02,7,3,0_2
2019-09-02 07:04:00,dairy,07:04:00,0.0,2019-09-02,7,4,0_3
2019-09-02 07:04:00,dairy,07:04:00,0.0,2019-09-02,7,4,0_4
2019-09-02 07:04:00,spices,07:04:00,0.0,2019-09-02,7,4,0_5
...,...,...,...,...,...,...,...
2019-09-06 21:50:00,checkout,21:50:00,4.0,2019-09-06,21,50,4_1508
2019-09-06 21:50:00,drinks,21:50:00,4.0,2019-09-06,21,50,4_1509
2019-09-06 21:50:00,checkout,21:50:00,4.0,2019-09-06,21,50,4_1509
2019-09-06 21:50:00,spices,21:50:00,4.0,2019-09-06,21,50,4_1510


In [4]:
probs = pd.read_csv("transition_probs.csv", index_col = 0)
probs

Unnamed: 0,checkout,dairy,drinks,fruit,spices,exit
dairy,0.103313,0.737023,0.058546,0.049772,0.051347,0
drinks,0.21563,0.0109,0.59856,0.087918,0.086992,0
entrance,0.000537,0.287441,0.153392,0.3773,0.18133,0
fruit,0.201558,0.095744,0.054834,0.597199,0.050665,0
spices,0.150526,0.193214,0.163109,0.090953,0.402198,0
checkout,0.0,0.0,0.0,0.0,0.0,1


Distribution of customers in their first stop.

In [16]:
first = customers.drop_duplicates(subset = "customer_id", keep = "first")

Initial section matrix.

In [17]:
init_smatrix = first.groupby(["location"])["customer_id"].count()

In [18]:
init_smatrix = init_smatrix/init_smatrix.sum()

In [19]:
init_smatrix

location
dairy     0.287576
drinks    0.153526
fruit     0.377435
spices    0.181464
Name: customer_id, dtype: float64

MCMC for one customer.

In [37]:
sections_first = ["dairy", "drinks", "fruit", "spices"]
sections = ["checkout", "dairy", "drinks", "exit", "fruit", "spices"]

Initial section.

In [38]:
P = np.array([ 0.287576, 0.153526, 0.377435, 0.181464])
P

array([0.287576, 0.153526, 0.377435, 0.181464])

Get the first choice of weighted random choices.

In [39]:
section = random.choices(sections_first, weights = P)[0]

In [40]:
P_f = probs.to_dict("list")

In [41]:
P_f

{'checkout': [0.1033125246049153,
  0.2156298200514138,
  0.0005372733378106,
  0.2015577059240028,
  0.1505256451099076,
  0.0],
 'dairy': [0.7370226646420336,
  0.0108997429305912,
  0.2874412357286769,
  0.0957438439147195,
  0.1932143994902835,
  0.0],
 'drinks': [0.0585456386030032,
  0.598560411311054,
  0.1533915379449295,
  0.0548343954055542,
  0.163109270468302,
  0.0],
 'fruit': [0.0497722287835329,
  0.087917737789203,
  0.3773002014775017,
  0.5971992762174495,
  0.0909525326537113,
  0.0],
 'spices': [0.0513469433665148,
  0.0869922879177377,
  0.1813297515110812,
  0.0506647785382739,
  0.4021981522777955,
  0.0],
 'exit': [0, 0, 0, 0, 0, 1]}

In [42]:
def get_next_section(section):
    weights = P_f[section]
    return random.choices(sections, weights = weights)[0]

result = [section]

while True:
    section = get_next_section(section)
    result.append(section)
    if section == "checkout":
        break
        
", ".join(result)

'dairy, checkout'