In [342]:
import pandas as pd
import numpy as np
from random import randint
import seaborn as sns
import matplotlib.pyplot as plt


In [2]:
#Operational Constants
INIT_CUSTOMER_STATES = ['dairy', 'drinks', 'fruit','spices']
CUSTOMER_STATE_KEY = ['checkout','dairy', 'drinks', 'fruit','spices'] # add entrance maybe - model this wrt number of ppl allowed in supermarket
CUST_STATE_VAL = [0,1,2,3,4]
CUST_STATE_DICT = dict(zip(CUSTOMER_STATE_KEY, CUST_STATE_VAL))

prob_first_loc = pd.read_csv('../data/first_loc_prob.csv', index_col = 0)
first_proba = prob_first_loc['first_loc_prob'].to_numpy()
P = pd.read_csv('../data/transition_prob_matrix.csv', index_col = 0)

In [3]:
P

Unnamed: 0,checkout,dairy,drinks,fruit,spices
checkout,0.0,0.0,0.0,0.0,0.0
dairy,0.390041,0.001282,0.222697,0.190425,0.195555
drinks,0.536748,0.027145,0.0,0.219206,0.216901
fruit,0.498829,0.237993,0.136861,0.000586,0.125732
spices,0.250733,0.323741,0.272848,0.152411,0.000266


In [333]:
def rand_initial_state_w_given_prob(init_state_space, proba):
    """
    Draws a random initial state with a given p derived from the data for the customer except checkout
    Returns the initial customer state key for one customer
    """
    rand_starting_state = np.random.choice(init_state_space, p = proba)
    return rand_starting_state

In [335]:
rand_initial_state_w_given_prob(INIT_CUSTOMER_STATES, first_proba)

'dairy'

In [376]:
def initial_state_vectos(init_state, state_space):
    """
    Takes the initial state key
    Returns an initial state matrix including checkout state
    """
    if init_state == 'checkout': # raise Exception
        return None
    shape_of_matrix = len(state_space)
    zero_vec = np.zeros(shape_of_matrix)
    init_val = CUST_STATE_DICT[init_state] 
    zero_vec[init_val] = 1
    return zero_vec

In [337]:
#initial_state_vector('checkout', CUSTOMER_STATE_KEY)
zero_vec = initial_state_vector('dairy', CUSTOMER_STATE_KEY)

In [338]:
zero_vec

array([0., 1., 0., 0., 0.])

In [339]:
def next_cust_state_vec(zero_vec, transition_prob_matrix):
    """Takes the initial state vector
    Converts it to value and makes an initial state vector
    From this vector, generates the next_state_prob_vec 
    with given transition_prob_matrix 
    """
    next_state_prob_vec = zero_vec.dot(transition_prob_matrix)
    return next_state_prob_vec


In [342]:
following = next_cust_state_vec(zero_vec, P)

In [9]:
#In util
def get_key(val, dictionary): 
    """Find the key associated with a value in a dictionary"""
    for key, value in dictionary.items(): 
        if val == value: 
            return key 

In [341]:
def next_state(next_state_vec, state_space, dictionary):
    """Return the next state key by drawing a random state with underlying probability of next_state_prob_vec"""
    next_state_val = np.random.choice(range(len(state_space)), p=next_state_vec)
    next_state_key = get_key(next_state_val, dictionary)
    return next_state_key

In [343]:
next_state(following, CUSTOMER_STATE_KEY, CUST_STATE_DICT)

'checkout'

In [345]:
init = rand_initial_state_w_given_prob(INIT_CUSTOMER_STATES, first_proba)

In [346]:
init

'drinks'

In [353]:
traj = simulate(init)

In [354]:
traj

['drinks', 'fruit', 'checkout']

In [19]:
class SupermarketCustomer:
    """A class that can create a shopping trajectory for a single supermarket customer
    It takes the possible initial and all state spaces, transition prob matrix, possible initial loc array"""
    
    
    def __init__(self, state_space, initial_state_space, transition_prob_matrix, inital_location_probabilities):
        self.state_space = state_space
        self.initial_state_space = initial_state_space
        self.transition_prob_matrix = transition_prob_matrix
        self.initial_location_probabilities = inital_location_probabilities
        self.state = np.random.choice(self.initial_state_space, p = self.initial_location_probabilities)

        self.customer_state_dict = CUST_STATE_DICT
        
        """
        Operating parameters:
        state_space = list of possible states in a supermarket
        initial_state_space = list of possible states at the beginning of a simulation
        transition_prob_matrix = DataFrame derived from EDA
        first_location_probabilities = array of probabilities of first location
        """
    
    
    def initial_state_vector(self):
        """
        Takes the initial state key
        Returns an initial state vector including checkout state
        """
        if self.state == 'checkout': # raise Exception
            return None
        shape_of_matrix = len(self.state_space)
        zero_vec = np.zeros(shape_of_matrix)
        init_val = self.customer_state_dict[self.state] 
        zero_vec[init_val] = 1
        return zero_vec
    
    def move_next(self):
        """Takes the initial state vector
        Converts it to value and makes an initial state vector
        From this vector, generates the next_state_prob_vec 
        with given transition_prob_matrix 
        """
        zero_vec = self.initial_state_vector()
        next_state_prob_vec = zero_vec.dot(self.transition_prob_matrix)
        next_state_val = np.random.choice(range(len(self.state_space)), p=next_state_prob_vec)
        self.state = get_key(next_state_val, self.customer_state_dict)
        return self.state


    

In [309]:
irem = SupermarketCustomer(CUSTOMER_STATE_KEY, INIT_CUSTOMER_STATES, P, first_proba)

In [310]:
simulate(irem)

['drinks', 'checkout']

In [33]:
def simulate(customer):
    """Simulate the customer behavior n times
    Exit when customer ends up in checkout"""
    customer_trajectory = [customer.state]
    while True: 
        customer_trajectory.append(customer.move_next())
        if customer.state == 'checkout':
            break
    return customer_trajectory

In [22]:
first_proba

array([0.28757555, 0.15352586, 0.37743452, 0.18146407])

In [25]:
simulate(irem)

ValueError: probabilities do not sum to 1

In [405]:
cust_per_min = pd.read_csv('../data/cust_freq_corona.csv', index_col = 0)

In [406]:
cust_per_min

Unnamed: 0_level_0,mean
hour,Unnamed: 1_level_1
07:00:00,1
07:01:00,0
07:02:00,0
07:03:00,1
07:04:00,1
07:05:00,0
07:06:00,1
07:07:00,1
07:08:00,0
07:09:00,0


In [392]:
cust_per_min_corona = pd.DataFrame([cust_per_min['mean']/3]).transpose()
cust_per_min_corona['mean']=cust_per_min_corona['mean'].round().astype(int)

In [394]:
cust_per_min_corona.to_csv('../data/cust_freq_corona.csv', columns = ['mean'], index = True, header = True)

In [395]:
cust_per_min_corona

Unnamed: 0_level_0,mean
hour,Unnamed: 1_level_1
07:00:00,1
07:01:00,1
07:02:00,1
07:03:00,0
07:04:00,0
07:05:00,0
07:06:00,1
07:07:00,0
07:08:00,1
07:09:00,1


In [354]:
def create_customer(cust_per_min):
    customer_number = pd.DataFrame(columns = ['time','customer_no', 'locations'])
    cust_no = 0
    for i, row in cust_per_min.iterrows():
        nr_ppl = row['mean']
        if nr_ppl > 0:
            random_nr = np.random.choice(range(nr_ppl))
            for a in range(random_nr):
                customer = SupermarketCustomer(CUSTOMER_STATE_KEY, INIT_CUSTOMER_STATES, P, first_proba)
                traj = simulate(customer)
                cust_no = cust_no + 1
                customer_number = customer_number.append({'time': str(i), 'customer_no':cust_no, 'locations': traj}, ignore_index = True)
        else:
            pass
    return customer_number

In [362]:
short_cust = create_customer(cust_per_min_corona).head(5)

In [363]:
short_cust

Unnamed: 0,time,customer_no,locations
0,07:12:00,1,"[drinks, fruit, dairy, drinks, checkout]"
1,07:12:00,2,"[dairy, checkout]"
2,07:14:00,3,"[spices, dairy, checkout]"
3,07:20:00,4,"[spices, drinks, checkout]"
4,07:22:00,5,"[fruit, checkout]"


In [89]:
short_cust['locations'][1]

['spices', 'fruit', 'spices', 'checkout']

In [302]:
dairyd = pd.read_csv('../data/dairy_duration.csv')
fruitsd = pd.read_csv('../data/fruit_duration.csv')
spicesd = pd.read_csv('../data/spices_duration.csv')
drinksd = pd.read_csv('../data/drinks_duration.csv')
checkoutd = pd.read_csv('../data/checkout_duration.csv')
dairy_duration = dairyd['duration_min'].to_numpy()
dairy_duration = np.delete(dairy_duration, np.where(dairy_duration == dairy_duration.max()))
dairy_duration = np.delete(dairy_duration, np.where(dairy_duration == 0))
fruit_duration = fruitsd['duration_min'].to_numpy()
fruit_duration = np.delete(fruit_duration, np.where(fruit_duration == fruit_duration.max()))
fruit_duration = np.delete(fruit_duration, np.where(fruit_duration == 0))
spices_duration = spicesd['duration_min'].to_numpy()
spices_duration = np.delete(spices_duration, np.where(spices_duration == 0))
drinks_duration = drinksd['duration_min'].to_numpy()
drinks_duration = np.delete(drinks_duration, np.where(drinks_duration == 0))


16.0

['spices', 'dairy', 'spices', 'fruit', 'dairy', 'spices', 'drinks', 'checkout'] 0 days 00:00:00
['fruit', 'checkout'] 0 days 00:00:00
['drinks', 'spices', 'checkout'] 0 days 00:00:00
['fruit', 'dairy', 'fruit', 'spices', 'drinks', 'fruit', 'drinks', 'checkout'] 0 days 00:00:00
['dairy', 'drinks', 'dairy', 'checkout'] 0 days 00:00:00
['spices', 'checkout'] 0 days 00:00:00
['drinks', 'checkout'] 0 days 00:00:00
['fruit', 'drinks', 'checkout'] 0 days 00:00:00
['spices', 'fruit', 'checkout'] 0 days 00:00:00
['dairy', 'drinks', 'checkout'] 0 days 00:00:00


In [307]:
def shop(cust_df, d, f, s, dr):
    final_cust = pd.DataFrame(columns = ['date', 'time', 'customer_no', 'location', 'min_spent'])
    dd = 0
    for i, row in cust_df.iterrows():
        initial_time = row['time']
        cust_id = row['customer_no']
        location_list = row['locations']
        for element in location_list:
            if element == 'dairy':
                dd = int(np.random.choice(d))
            if element == 'fruits':
                dd = int(np.random.choice(f))
            if element == 'spices':
                dd = int(np.random.choice(s))
            if element == 'drinks':
                dd = int(np.random.choice(dr))
            if element == 'checkout':
                dd = int(np.random.choice(5, 1))
            final_cust = final_cust.append({'date': pd.to_datetime('today'),
                                            'time': initial_time, 'customer_no': cust_id,
                                            'location': element, 'min_spent': pd.Timedelta(np.timedelta64(dd, 'm'))}, ignore_index = True)
        final_cust['d'] = final_cust['date'].dt.date
        final_cust['time'] = pd.to_datetime(final_cust['time'])
        final_cust['min_number'] = final_cust['min_spent'].dt.seconds/60
        final_cust['min_spent_cumul'] = final_cust.groupby('customer_no')['min_number'].cumsum()
        final_cust['min_spent_cumul'] = pd.to_timedelta(final_cust['min_spent_cumul'], unit = 'm')
        final_cust['timestamp_new'] = final_cust['time']+ final_cust['min_spent_cumul']
        final_cust['timestamp_new'] = final_cust['timestamp_new'].dt.time

        clean_final = final_cust[['d','timestamp_new', 'customer_no', 'location']]
        clean_final = clean_final.set_axis(['date', 'time', 'customer_no', 'location'], axis=1, inplace=False)
    return clean_final

In [308]:
shop(short_cust, dairy_duration, fruit_duration, spices_duration, drinks_duration)

Unnamed: 0,date,time,customer_no,location
0,2020-04-08,07:00:00,1,fruit
1,2020-04-08,07:02:00,1,checkout
2,2020-04-08,07:05:00,2,fruit
3,2020-04-08,07:08:00,2,checkout
4,2020-04-08,07:06:00,3,spices
5,2020-04-08,07:09:00,3,dairy
6,2020-04-08,07:12:00,3,checkout
7,2020-04-08,07:10:00,4,drinks
8,2020-04-08,07:14:00,4,checkout
9,2020-04-08,07:16:00,5,fruit


In [None]:
clean_final

In [315]:
this_day = pd.to_datetime('today')

In [316]:
day = 1

In [318]:
pd.Timedelta(np.timedelta64(day, 'D'))

Timedelta('1 days 00:00:00')

In [319]:
this_day + pd.Timedelta(np.timedelta64(day, 'D'))

Timestamp('2020-04-09 20:59:13.724197')