In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime as dt
from faker import Faker
from random import choices

In [2]:
files = os.listdir(path='data')

In [3]:
files

['friday.csv', 'monday.csv', 'thursday.csv', 'tuesday.csv', 'wednesday.csv']

In [4]:
def read_files(files, datapath='data/', delimiter = ';'):
        df_temp = []

        if type(files) == list:
            for i in files:
                df = pd.read_csv(datapath+i, delimiter=delimiter, parse_dates=True, index_col='timestamp')
                day_name = df.index.day_name()[0][:3]
                df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{x}')
                df_temp.append(df)
            df = pd.concat(df_temp)
        else:
            df = pd.read_csv(files, delimiter=delimiter, parse_dates=True, index_col='timestamp')
            day_name = df.index.day_name()[0][:3]
            df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{x}')

        return df

In [5]:
shop_data = read_files(files)

In [None]:
fake = Faker()

# Insert timesteps to show minutes spent in section

In [8]:
shop_df = shop_data.groupby(['customer_no', 'location']).resample('T').ffill()

In [9]:
shop_df = shop_df.droplevel('location').reset_index(level=1)

In [10]:
shop_df = shop_df.drop('customer_no', axis=1)

In [11]:
shop_df = shop_df.reset_index()

In [12]:
shop_df

Unnamed: 0,customer_no,timestamp,location
0,Fri_1,2019-09-06 07:05:00,checkout
1,Fri_1,2019-09-06 07:00:00,dairy
2,Fri_1,2019-09-06 07:04:00,spices
3,Fri_10,2019-09-06 07:11:00,checkout
4,Fri_10,2019-09-06 07:06:00,fruit
...,...,...,...
43623,Wed_998,2019-09-04 16:59:00,checkout
43624,Wed_998,2019-09-04 16:53:00,dairy
43625,Wed_998,2019-09-04 16:57:00,fruit
43626,Wed_999,2019-09-04 16:54:00,checkout


location_list = shop_data.groupby(['customer_no'])['location'].apply(list)

food_store = Store()

for i in location_list.index:
    print(i, location_list[i])
    cust = Customer(fake.name(), i, location_list[i])
    data = cust.send_info()
    food_store.store_customer(data)

food_store.customers[0]['Fri_1'][1]

# Create entrance times and fill checkout times

In [13]:
def fill_enter(dataframe):
    
    entry_time = dataframe.loc[dataframe.notnull()].min() - dt.timedelta(minutes=1)
    return entry_time
    #if dataframe.entrance != dataframe.entrance:
    #    entry_time = dataframe.loc[dataframe.notnull()].min() - timedelta(minuntes=1)
    #    return entry_time
    #elif dataframe.entrance:
    #    return dataframe.entrance

In [14]:
def fill_exit(dataframe):
    if dataframe.checkout != dataframe.checkout:
        closing_time = dataframe.loc[dataframe.notnull()].max().round('H')
        return closing_time
    elif dataframe.checkout:
        return dataframe.checkout

In [15]:
#Create customer flow pivoted table for addition of entrance and checkout times
customer_flow = pd.pivot_table(shop_df, index='customer_no', columns='location', values='timestamp' )

In [16]:
#Fill Entrance and checkout times
customer_flow['entrance'] = customer_flow.apply(fill_enter, axis=1)
customer_flow['checkout'] = customer_flow.apply(fill_exit, axis=1)

# Table from wide to long

In [17]:
customer_flow = customer_flow.reset_index(level=0)

In [18]:
#Unpivot table
customer_flow = customer_flow.melt(id_vars='customer_no', value_name='timestamp')

In [19]:
#Drop na values
customer_flow = customer_flow.dropna()

In [20]:
#Sort values to show flow in order
customer_flow = customer_flow.sort_values(by=['customer_no', 'timestamp'])

In [21]:
customer_flow

Unnamed: 0,customer_no,location,timestamp
37225,Fri_1,entrance,2019-09-06 06:59:00
7445,Fri_1,dairy,2019-09-06 07:00:00
29780,Fri_1,spices,2019-09-06 07:04:00
0,Fri_1,checkout,2019-09-06 07:05:00
37226,Fri_10,entrance,2019-09-06 07:05:00
...,...,...,...
29778,Wed_998,fruit,2019-09-04 16:57:00
7443,Wed_998,checkout,2019-09-04 16:59:00
44669,Wed_999,entrance,2019-09-04 16:52:00
29779,Wed_999,fruit,2019-09-04 16:53:00


# Shift location one step

In [None]:
customer_transitions = customer_flow.sort_values(by=['customer_no', 'timestamp'])

In [None]:
customer_transitions = customer_transitions[['customer_no', 'location']]

In [None]:
customer_transitions['location+1'] = customer_transitions.groupby('customer_no')['location'].shift(-1)

In [None]:
customer_transitions

# Create Probabilities table

In [None]:
probabilites = pd.crosstab(customer_transitions['location'], customer_transitions['location+1'], normalize=0)

In [None]:
probabilites

food_store.store_states(probabilities)

# Predict states

In [None]:
from random import choices

In [None]:
location_states = list(customer_transitions.location.unique())
location_states

# Create Store and Customer Class

In [6]:
class Store():
    def __init__(self, sections=STATES, times=TIMES):
        self.customers = []
        self.customer_count = len(self.customers)
        self.sections = sections
        self.times = times
        
    def add_customer(self, data):
        """
        Adds customer class instance to list of customers in the store.
        
        Returns
        _______
        
        None 
        """
        
        self.customers.append(data)
        
    def show_sections(self):
        """
        Show all sections of store
        
        Returns
        _______
        
        list: list of sections in store
        """
        return self.sections
   

    def simulate_flow(self):
        """
        Simulates customer moving through store.
        
        Returns
        _______
        
        str: customers next state 
        """
        
        self.minute =+1
        for customer in self.customers:
            return customer.next_state()
        
    def show_busiest(self):
        self.times
        
    def show_slowest(self):
        #calculate the busiest time in the store
        
    def most_visited(self):
        #show section most visited
        
    def least_visited(self):
        #show section least visited

In [7]:
class Customer():
    
    def __init__(self, name, cust_id, state='entrance'):
        self.name = name
        self.cust_id = cust_id
        self.state = state
        
    def next_state(self):
        probs = PROBABILITIES[self.state]
        self.state = choices(STATES, weighs=probs)[0]
    
    def __repr__(self):
        return f'{self.name} (Customer: {self.id}) at {self.state}'
    
    @property
    def is_active(self):
        return self.state != 'checkout':
        
    

In [None]:
if __name__ == '__main__':
    customer1 = ''
    customer2 = ''
    customer3 = ''
    
    active = True
    while active:
        customer1 = ''
        customer2 = ''
        customer3 = ''
        
        active = customer1.is_active