In [42]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime as dt
from faker import Faker
import random
from random import choices
from collections import Counter
import time

In [2]:
files = os.listdir(path='data')

In [3]:
files

['friday.csv', 'monday.csv', 'thursday.csv', 'tuesday.csv', 'wednesday.csv']

In [4]:
def read_files(files, datapath='data/', delimiter = ';'):
        df_temp = []
        days = {'mo':1000,
                'tu':2000
                'we':3000
                'th':4000
                'fr':5000}

        if type(files) == list:
            for i in files:
                df = pd.read_csv(datapath+i, delimiter=delimiter, parse_dates=True, index_col='timestamp')
                day_name = df.index.day_name()[0][:2]
                
                df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{int(x)+days[day_name]}')
                df_temp.append(df)
            df = pd.concat(df_temp)
        else:
            df = pd.read_csv(files, delimiter=delimiter, parse_dates=True, index_col='timestamp')
            day_name = df.index.day_name()[0][:3]
            df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{x}')

        return df

In [5]:
shop_data = read_files(files)

# Insert timesteps to show minutes spent in section

In [6]:
shop_df = shop_data.groupby(['customer_no', 'location']).resample('T').ffill()

In [7]:
shop_df = shop_df.droplevel('location').reset_index(level=1)

In [8]:
shop_df = shop_df.drop('customer_no', axis=1)

In [9]:
shop_df = shop_df.reset_index()

In [10]:
shop_df

Unnamed: 0,customer_no,timestamp,location
0,Fri_1,2019-09-06 07:05:00,checkout
1,Fri_1,2019-09-06 07:00:00,dairy
2,Fri_1,2019-09-06 07:04:00,spices
3,Fri_10,2019-09-06 07:11:00,checkout
4,Fri_10,2019-09-06 07:06:00,fruit
...,...,...,...
43623,Wed_998,2019-09-04 16:59:00,checkout
43624,Wed_998,2019-09-04 16:53:00,dairy
43625,Wed_998,2019-09-04 16:57:00,fruit
43626,Wed_999,2019-09-04 16:54:00,checkout


location_list = shop_data.groupby(['customer_no'])['location'].apply(list)

food_store = Store()

for i in location_list.index:
    print(i, location_list[i])
    cust = Customer(fake.name(), i, location_list[i])
    data = cust.send_info()
    food_store.store_customer(data)

food_store.customers[0]['Fri_1'][1]

# Create entrance times and fill checkout times

In [11]:
def fill_enter(dataframe):
    
    entry_time = dataframe.loc[dataframe.notnull()].min() - dt.timedelta(minutes=1)
    return entry_time
    #if dataframe.entrance != dataframe.entrance:
    #    entry_time = dataframe.loc[dataframe.notnull()].min() - timedelta(minuntes=1)
    #    return entry_time
    #elif dataframe.entrance:
    #    return dataframe.entrance

In [12]:
def fill_checkout(dataframe):
    if dataframe.checkout != dataframe.checkout:
        closing_time = dataframe.loc[dataframe.notnull()].max().round('H')
        return closing_time
    elif dataframe.checkout:
        return dataframe.checkout

In [13]:
def fill_exit(dataframe):
    exit_time = dataframe.loc[dataframe.notnull()].max() + dt.timedelta(minutes=1)
    return exit_time

    #if dataframe.checkout != dataframe.checkout:
    #    closing_time = dataframe.loc[dataframe.notnull()].max().round('H')
    #    return closing_time
    #elif dataframe.checkout:
    #    return dataframe.checkout

In [14]:
#Create customer flow pivoted table for addition of entrance and checkout times
customer_flow = pd.pivot_table(shop_df, index='customer_no', columns='location', values='timestamp' )

In [15]:
#Fill Entrance and checkout times
customer_flow['entrance'] = customer_flow.apply(fill_enter, axis=1)
customer_flow['checkout'] = customer_flow.apply(fill_checkout, axis=1)
customer_flow['exit'] = customer_flow.apply(fill_exit, axis=1)

# Table from wide to long

In [16]:
customer_flow = customer_flow.reset_index(level=0)

In [17]:
#Unpivot table
customer_flow = customer_flow.melt(id_vars='customer_no', value_name='timestamp')

In [18]:
#Drop na values
customer_flow = customer_flow.dropna()

In [19]:
#Sort values to show flow in order
customer_flow = customer_flow.sort_values(by=['customer_no', 'timestamp'])

In [20]:
customer_flow

Unnamed: 0,customer_no,location,timestamp
37225,Fri_1,entrance,2019-09-06 06:59:00
7445,Fri_1,dairy,2019-09-06 07:00:00
29780,Fri_1,spices,2019-09-06 07:04:00
0,Fri_1,checkout,2019-09-06 07:05:00
44670,Fri_1,exit,2019-09-06 07:06:00
...,...,...,...
52113,Wed_998,exit,2019-09-04 17:00:00
44669,Wed_999,entrance,2019-09-04 16:52:00
29779,Wed_999,fruit,2019-09-04 16:53:00
7444,Wed_999,checkout,2019-09-04 16:54:00


# Shift location one step

In [21]:
customer_transitions = customer_flow.sort_values(by=['customer_no', 'timestamp'])

In [22]:
customer_transitions = customer_transitions[['customer_no', 'location']]

In [23]:
customer_transitions['location+1'] = customer_transitions.groupby('customer_no')['location'].shift(-1)

In [24]:
customer_transitions

Unnamed: 0,customer_no,location,location+1
37225,Fri_1,entrance,dairy
7445,Fri_1,dairy,spices
29780,Fri_1,spices,checkout
0,Fri_1,checkout,exit
44670,Fri_1,exit,
...,...,...,...
52113,Wed_998,exit,
44669,Wed_999,entrance,fruit
29779,Wed_999,fruit,checkout
7444,Wed_999,checkout,exit


# Create Probabilities table

In [25]:
customer_transitions['location']

37225    entrance
7445        dairy
29780      spices
0        checkout
44670        exit
           ...   
52113        exit
44669    entrance
29779       fruit
7444     checkout
52114        exit
Name: location, Length: 36589, dtype: object

In [26]:
probabilites = pd.crosstab(customer_transitions['location'], customer_transitions['location+1'], normalize=0)

In [27]:
STATES = probabilites.columns.tolist()

In [28]:
probabilites

location+1,checkout,dairy,drinks,exit,fruit,spices
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
checkout,0.0,0.0,0.0,1.0,0.0,0.0
dairy,0.452069,0.0,0.2022,0.0,0.174175,0.171556
drinks,0.625078,0.054449,0.0,0.0,0.165215,0.155258
entrance,0.0,0.30544,0.16454,0.0,0.357018,0.173002
fruit,0.587302,0.173669,0.123249,0.0,0.0,0.11578
spices,0.406399,0.21273,0.234513,0.0,0.146358,0.0


# Predict states

In [29]:
location_states = list(customer_transitions.location.unique())
location_states

['entrance', 'dairy', 'spices', 'checkout', 'exit', 'fruit', 'drinks']

# Global Variables

#### Store traffic by hour

In [30]:
TIMESTAMPS = customer_flow.timestamp.dt.hour.values.tolist()
TIMESTAMPS_totals = Counter(TIMESTAMPS)

#### Store traffic by section

In [31]:
visited = customer_flow.location.values.tolist()
visited_totals = Counter(visited)

#### States

In [32]:
STATES = probabilites.columns.tolist()

In [33]:
STATES

['checkout', 'dairy', 'drinks', 'exit', 'fruit', 'spices']

#### Probabilities

In [34]:
PROBABILITIES = probabilites.T.to_dict(orient='list')

In [35]:
PROBABILITIES

{'checkout': [0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
 'dairy': [0.45206914614981664,
  0.0,
  0.20220010476689365,
  0.0,
  0.17417496071241487,
  0.1715557883708748],
 'drinks': [0.6250777846919726,
  0.05444928438083385,
  0.0,
  0.0,
  0.16521468574984444,
  0.1552582451773491],
 'entrance': [0.0,
  0.30543989254533244,
  0.16453995970449967,
  0.0,
  0.3570181329751511,
  0.17300201477501678],
 'fruit': [0.5873015873015873,
  0.17366946778711484,
  0.12324929971988796,
  0.0,
  0.0,
  0.1157796451914099],
 'spices': [0.40639891082368956,
  0.2127297481279782,
  0.2345132743362832,
  0.0,
  0.146358066712049,
  0.0]}

#### Current Time

In [36]:
now = dt.datetime.now()
current_time = now.strftime("%H:%M:%S")

# Create Store and Customer Class

In [73]:
class Store():
    def __init__(self, name, sections=STATES, times=TIMESTAMPS_totals):
        self.name = name
        self.sections = sections
        
        #customer class for active customers and inactive customers
        self.active_customers = []
        self.visited_customers = []
        
        #Simulation visitation metrics
        self.times = times
        self.time_totals = []
        self.section_data = []
        
        #To output information
        self.to_print = []
        
    def add_customer(self, customer):
        """
        Adds customer class instance to list of customers in the store.
        
        Returns
        _______
        
        string telling name, state and time 
        """
        
        now = dt.datetime.now()
        current_time = now.strftime("%H:%M:%S")
                
        self.active_customers.append(customer)
        print(f'{customer.name} is at {self.name.title()} {customer.state} at {current_time}')
        
    def show_sections(self):
        """
        Show all sections of store
        
        Returns
        _______
        
        list: list of sections in store
        """
        return self.sections
   

    def simulate_flow(self):
        """
        Simulates customer moving through store.
        
        Returns
        _______
        
        str: customers next state 
        """
        now = dt.datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        
        for customer in self.active_customers:
            temp_time = []
            print(f'\n####### Moving {customer.name} [customer_id: {customer.cust_id}] through {self.name} #######')
            while customer.is_active:
                customer.next_state()
                if customer.state != 'checkout':
                    self.section_data.append(customer.state)

                print(f'{customer.cust_id} is now at {customer.state}: Timestamp({current_time})')
                
                #Append data for output
                record = {'customer':customer.cust_id, 'location': customer.state,  'timestamp': current_time}
                self.to_print.append(record)
                
                
                timestep = random.randint(1,5)
                
                now+= dt.timedelta(minutes=timestep)
                current_time = now.strftime("%H:%M:%S")
                temp_time.append(now)
                
            self.time_totals.append(temp_time)
            
            # Cutomer has reached check out, remove from active customers and add to visited list
            self.active_customers.remove(customer)
            self.visited_customers.append(customer)
            print(f'{customer.name} [customer_id: {customer.cust_id}] has just left {self.name}')
            
    
    def show_busiest(self):
        """
        Shows busiest store times
        """
        busiest = self.times.most_common()[0] 
        return f'{busiest[0]} o\'clock is the buisiest time of the day with {busiest[1]} total visits'
        
    def show_slowest(self):
        """
        Shows slowest store times
        """
        slowest = self.times.most_common()[-1]
        return f'{slowest[0]} o\'clock is the buisiest time of the day with {slowest[1]} total visits'
        
    def most_visited(self):
        #show section most visited
        self.data = Counter(self.section_data) 
        self.data = self.data.most_common()[0]
        return f'Most Visited Section: {self.data[0].title()} with {self.data[1]} vists'
        
    def least_visited(self):
        #show section least visited
        self.data = Counter(self.section_data) 
        self.data = self.data.most_common()[-1]
        return f'Least Visited Section: {self.data[0].title()} with {self.data[1]} vists'
    
    def customer_time_data(self):
        self.total_time = 0
        self.total_timesteps = 0
        for i in self.time_totals:
            time_diff = i[-1] - i[0]
            self.total = time_diff.seconds / 60
            
            self.total_time = self.total_time + self.total
            self.total_timesteps = self.total_timesteps + len(i)
            
        self.avg_time = self.total_time/self.total_timesteps
            
        print(f'Customers total time in store: {self.total_time} minutes \
        Customers average time in store: {round(self.avg_time, 1)} minutes')
        
    def save_data(self):
        now = dt.datetime.now()
        #current_time = now.strftime()
        current_time = now.strftime("%H_%M_%S")
        filename = f'customers_data_{current_time}'
        df = pd.DataFrame(self.to_print)
        df.to_csv(filename)
        print('CSV Generated')
        
    

In [38]:
class Customer():
    
    def __init__(self, cust_id,name, state='entrance'):
        self.name = name
        self.cust_id = cust_id
        self.state = state
        
    def next_state(self):
        probs = PROBABILITIES[self.state]
        self.state = choices(STATES, weights=probs)[0]
    
    def __repr__(self):
        return f'{self.name} (customer_id: {self.cust_id}) at {self.state}'
    
    @property
    def is_active(self):
        return self.state != 'checkout'
        
    

In [39]:
fake = Faker()

In [40]:
def customer_generator():
    random.seed(10)
    customer_ids = random.sample(range(10, 5000), 8)
    customer_names = [fake.name() for i in range(len(customer_ids))]
    
    return customer_ids, customer_names

In [74]:
if __name__ == '__main__':
    
    edeka = Store('Edeka')
    
    customer_ids, customer_names = customer_generator()
    for i in range(len(customer_ids)):
        cust_id = customer_ids[i]
        cust_name = customer_names[i]
    
    
        customer = Customer(cust_id, cust_name)
        edeka.add_customer(customer)
        
    

    for i in range(len(edeka.active_customers)):
        edeka.simulate_flow()
        time.sleep(2)
        
    print(f'\nThere are no more customers currently in {edeka.name}')
    #customer3 = ''
    

Hannah Casey is at Edeka entrance at 15:46:46
Austin Baker is at Edeka entrance at 15:46:46
Diana Cummings is at Edeka entrance at 15:46:46
William Perez is at Edeka entrance at 15:46:46
Amanda Hunt is at Edeka entrance at 15:46:46
Thomas Melendez is at Edeka entrance at 15:46:46
Holly Duarte is at Edeka entrance at 15:46:46
Amanda Price is at Edeka entrance at 15:46:46

####### Moving Hannah Casey [customer_id: 4690] through Edeka #######
4690 is now at fruit: Timestamp(15:46:46)
4690 is now at dairy: Timestamp(15:49:46)
4690 is now at checkout: Timestamp(15:51:46)
Hannah Casey [customer_id: 4690] has just left Edeka

####### Moving Diana Cummings [customer_id: 3523] through Edeka #######
3523 is now at drinks: Timestamp(15:55:46)
3523 is now at spices: Timestamp(15:57:46)
3523 is now at checkout: Timestamp(16:00:46)
Diana Cummings [customer_id: 3523] has just left Edeka

####### Moving Amanda Hunt [customer_id: 4745] through Edeka #######
4745 is now at fruit: Timestamp(16:02:46)
474

***Note that the follow block shows busiest times based off data
imported from the data used to get the MCMC probabilites data***

In [None]:
shop.show_busiest()

In [69]:
edeka.most_visited()

'Most Visited Section: Drinks with 5 vists'

In [76]:
edeka.least_visited()

'Least Visited Section: Spices with 2 vists'

In [75]:
edeka.customer_time_data()

Customers total time in store: 43.0 minutes         Customers average time in store: 2.0 minutes


In [77]:
edeka.save_data()

CSV Generated
