In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime as dt
from faker import Faker
import random
from random import choices
from collections import Counter
import time

In [2]:
files = os.listdir(path='data')

In [3]:
files

['friday.csv', 'monday.csv', 'thursday.csv', 'tuesday.csv', 'wednesday.csv']

In [4]:
def read_files(files, datapath='data/', delimiter = ';'):
        df_temp = []
        days = {'mo':1000,
                'tu':2000,
                'we':3000,
                'th':4000,
                'fr':5000}

        if type(files) == list:
            for i in files:
                df = pd.read_csv(datapath+i, delimiter=delimiter, parse_dates=True, index_col='timestamp')
                day_name = df.index.day_name()[0][:2].lower()
                
                df['customer_no'] = df.customer_no.apply(lambda x: f'{int(x)+days[day_name]}')
                df_temp.append(df)
            df = pd.concat(df_temp)
        else:
            df = pd.read_csv(files, delimiter=delimiter, parse_dates=True, index_col='timestamp')
            day_name = df.index.day_name()[0][:3]
            df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{x}')

        return df

In [5]:
shop_data = read_files(files)

In [6]:
shop_data

Unnamed: 0_level_0,customer_no,location
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-09-06 07:00:00,5001,dairy
2019-09-06 07:00:00,5002,drinks
2019-09-06 07:00:00,5003,fruit
2019-09-06 07:01:00,5002,checkout
2019-09-06 07:01:00,5004,drinks
...,...,...
2019-09-04 21:48:00,4528,spices
2019-09-04 21:49:00,4520,dairy
2019-09-04 21:49:00,4529,dairy
2019-09-04 21:49:00,4530,fruit


# Insert timesteps to show minutes spent in section

In [7]:
shop_df = shop_data.groupby(['customer_no', 'location']).resample('T').ffill()

In [8]:
shop_df = shop_df.droplevel('location').reset_index(level=1)

In [9]:
shop_df = shop_df.drop('customer_no', axis=1)

In [10]:
shop_df = shop_df.reset_index()

In [11]:
shop_df

Unnamed: 0,customer_no,timestamp,location
0,1001,2019-09-02 07:05:00,checkout
1,1001,2019-09-02 07:03:00,dairy
2,1002,2019-09-02 07:06:00,checkout
3,1002,2019-09-02 07:03:00,dairy
4,1003,2019-09-02 07:06:00,checkout
...,...,...,...
3109564,6507,2019-09-06 21:48:00,dairy
3109565,6508,2019-09-06 21:50:00,checkout
3109566,6508,2019-09-06 21:48:00,dairy
3109567,6509,2019-09-06 21:50:00,drinks


location_list = shop_data.groupby(['customer_no'])['location'].apply(list)

food_store = Store()

for i in location_list.index:
    print(i, location_list[i])
    cust = Customer(fake.name(), i, location_list[i])
    data = cust.send_info()
    food_store.store_customer(data)

food_store.customers[0]['Fri_1'][1]

# Create entrance times and fill checkout times

In [12]:
def fill_enter(dataframe):
    
    entry_time = dataframe.loc[dataframe.notnull()].min() - dt.timedelta(minutes=1)
    return entry_time
    #if dataframe.entrance != dataframe.entrance:
    #    entry_time = dataframe.loc[dataframe.notnull()].min() - timedelta(minuntes=1)
    #    return entry_time
    #elif dataframe.entrance:
    #    return dataframe.entrance

In [13]:
def fill_checkout(dataframe):
    if dataframe.checkout != dataframe.checkout:
        closing_time = dataframe.loc[dataframe.notnull()].max().round('H')
        return closing_time
    elif dataframe.checkout:
        return dataframe.checkout

In [14]:
def fill_exit(dataframe):
    exit_time = dataframe.loc[dataframe.notnull()].max() + dt.timedelta(minutes=1)
    return exit_time

    #if dataframe.checkout != dataframe.checkout:
    #    closing_time = dataframe.loc[dataframe.notnull()].max().round('H')
    #    return closing_time
    #elif dataframe.checkout:
    #    return dataframe.checkout

In [15]:
#Create customer flow pivoted table for addition of entrance and checkout times
customer_flow = pd.pivot_table(shop_df, index='customer_no', columns='location', values='timestamp' )

In [16]:
#Fill Entrance and checkout times
customer_flow['entrance'] = customer_flow.apply(fill_enter, axis=1)
customer_flow['checkout'] = customer_flow.apply(fill_checkout, axis=1)
customer_flow['exit'] = customer_flow.apply(fill_exit, axis=1)

# Table from wide to long

In [17]:
customer_flow = customer_flow.reset_index(level=0)

In [18]:
#Unpivot table
customer_flow = customer_flow.melt(id_vars='customer_no', value_name='timestamp')

In [19]:
#Drop na values
customer_flow = customer_flow.dropna()

In [27]:
#Sort values to show flow in order
customer_flow = customer_flow.sort_values(by=['timestamp'])

In [28]:
customer_flow

Unnamed: 0,customer_no,location,timestamp
27550,1001,entrance,2019-09-02 07:02:00
27551,1002,entrance,2019-09-02 07:02:00
27552,1003,entrance,2019-09-02 07:03:00
5511,1002,dairy,2019-09-02 07:03:00
5510,1001,dairy,2019-09-02 07:03:00
...,...,...,...
38568,6509,exit,2019-09-06 22:01:00
38555,6496,exit,2019-09-06 22:01:00
38553,6494,exit,2019-09-06 22:01:00
38562,6503,exit,2019-09-06 22:01:00


# Shift location one step

In [None]:
#customer_transitions = customer_flow.sort_values(by=['customer_no', 'timestamp'])

In [29]:
customer_transitions1 = customer_flow

In [30]:
customer_transitions = customer_transitions1[['customer_no', 'location']]

In [31]:
customer_transitions['location+1'] = customer_transitions.groupby('customer_no')['location'].shift(-1)

In [32]:
customer_transitions

Unnamed: 0,customer_no,location,location+1
27550,1001,entrance,dairy
27551,1002,entrance,dairy
27552,1003,entrance,dairy
5511,1002,dairy,checkout
5510,1001,dairy,checkout
...,...,...,...
38568,6509,exit,
38555,6496,exit,
38553,6494,exit,
38562,6503,exit,


In [None]:
customer_transitions

# Create Probabilities table

In [24]:
customer_transitions['location']

0        checkout
1        checkout
2        checkout
3        checkout
4        checkout
           ...   
38565        exit
38566        exit
38567        exit
38568        exit
38569        exit
Name: location, Length: 28989, dtype: object

In [33]:
probabilites = pd.crosstab(customer_transitions['location'], customer_transitions['location+1'], normalize=0)

In [None]:
STATES = probabilites.columns.tolist()

In [34]:
probabilites

location+1,checkout,dairy,drinks,exit,fruit,spices
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
checkout,0.0,0.069328,0.050272,0.762976,0.062069,0.055354
dairy,0.397112,0.0,0.17509,0.10349,0.167569,0.156739
drinks,0.498068,0.081489,0.0,0.121531,0.153846,0.145065
entrance,0.0,0.289292,0.177677,0.0,0.347913,0.185118
fruit,0.531541,0.157433,0.115469,0.09051,0.0,0.105047
spices,0.31567,0.205148,0.222559,0.108251,0.148372,0.0


# Predict states

In [None]:
location_states = list(customer_transitions.location.unique())
location_states

# Global Variables

#### Store traffic by hour

In [35]:
TIMESTAMPS = customer_flow.timestamp.dt.hour.values.tolist()
TIMESTAMPS_totals = Counter(TIMESTAMPS)

#### Store traffic by section

In [36]:
visited = customer_flow.location.values.tolist()
visited_totals = Counter(visited)

#### States

In [45]:
STATES = probabilites.columns.tolist()

In [46]:
STATES

['checkout', 'dairy', 'drinks', 'exit', 'fruit', 'spices']

#### Probabilities

In [38]:
PROBABILITIES = probabilites.T.to_dict(orient='list')

In [40]:
PROBABILITIES

{'checkout': [0.0,
  0.06932849364791288,
  0.05027223230490018,
  0.7629764065335753,
  0.06206896551724138,
  0.05535390199637023],
 'dairy': [0.3971119133574007,
  0.0,
  0.17509025270758122,
  0.10348977135980746,
  0.16756919374247894,
  0.15673886883273164],
 'drinks': [0.49806814190375837,
  0.08148928696873903,
  0.0,
  0.12153143659992975,
  0.15384615384615385,
  0.14506498068141904],
 'entrance': [0.0,
  0.28929219600725953,
  0.17767695099818512,
  0.0,
  0.34791288566243195,
  0.1851179673321234],
 'fruit': [0.5315414152495886,
  0.15743280307185958,
  0.11546900713110257,
  0.09051014810751508,
  0.0,
  0.10504662643993418],
 'spices': [0.315669947009841,
  0.20514761544284632,
  0.22255866767600302,
  0.10825132475397427,
  0.14837244511733536,
  0.0]}

#### Current Time

In [None]:
now = dt.datetime.now()
current_time = now.strftime("%H:%M:%S")

# Create Store and Customer Class

In [49]:
class Store():
    def __init__(self, name, sections=STATES, times=TIMESTAMPS_totals):
        self.name = name
        self.sections = sections
        
        #customer class for active customers and inactive customers
        self.active_customers = []
        self.visited_customers = []
        
        #Simulation visitation metrics
        self.times = times
        self.time_totals = []
        self.section_data = []
        
        #To output information
        self.to_print = []
        
    def add_customer(self, customer):
        """
        Adds customer class instance to list of customers in the store.
        
        Returns
        _______
        
        string telling name, state and time 
        """
        
        now = dt.datetime.now()
        current_time = now.strftime("%H:%M:%S")
                
        self.active_customers.append(customer)
        print(f'{customer.name} is at {self.name.title()} {customer.state} at {current_time}')
        
    def show_sections(self):
        """
        Show all sections of store
        
        Returns
        _______
        
        list: list of sections in store
        """
        return self.sections
   

    def simulate_flow(self):
        """
        Simulates customer moving through store.
        
        Returns
        _______
        
        str: customers next state 
        """
        now = dt.datetime.now()
        current_time = now.strftime("%H:%M:%S")
        
        
        for customer in self.active_customers:
            temp_time = []
            print(f'\n####### Moving {customer.name} [customer_id: {customer.cust_id}] through {self.name} #######')
            while customer.is_active:
                customer.next_state()
                if customer.state != 'checkout' or customer.state != 'exit':
                    self.section_data.append(customer.state)

                print(f'{customer.cust_id} is now at {customer.state}: Timestamp({current_time})')
                
                #Append data for output
                record = {'customer':customer.cust_id, 'location': customer.state,  'timestamp': current_time}
                self.to_print.append(record)
                
                
                timestep = random.randint(1,5)
                
                now+= dt.timedelta(minutes=timestep)
                current_time = now.strftime("%H:%M:%S")
                temp_time.append(now)
                
            self.time_totals.append(temp_time)
            
            # Cutomer has reached check out, remove from active customers and add to visited list
            self.active_customers.remove(customer)
            self.visited_customers.append(customer)
            print(f'{customer.name} [customer_id: {customer.cust_id}] has just left {self.name}')
            
    
    def show_busiest(self):
        """
        Shows busiest store times
        """
        busiest = self.times.most_common()[0] 
        return f'{busiest[0]} o\'clock is the buisiest time of the day with {busiest[1]} total visits'
        
    def show_slowest(self):
        """
        Shows slowest store times
        """
        slowest = self.times.most_common()[-1]
        return f'{slowest[0]} o\'clock is the buisiest time of the day with {slowest[1]} total visits'
        
    def most_visited(self):
        #show section most visited
        self.data = Counter(self.section_data) 
        self.data = self.data.most_common()[0]
        return f'Most Visited Section: {self.data[0].title()} with {self.data[1]} vists'
        
    def least_visited(self):
        #show section least visited
        self.data = Counter(self.section_data) 
        self.data = self.data.most_common()[-1]
        return f'Least Visited Section: {self.data[0].title()} with {self.data[1]} vists'
    
    def customer_time_data(self):
        self.total_time = 0
        self.total_timesteps = 0
        for i in self.time_totals:
            time_diff = i[-1] - i[0]
            self.total = time_diff.seconds / 60
            
            self.total_time = self.total_time + self.total
            self.total_timesteps = self.total_timesteps + len(i)
            
        self.avg_time = self.total_time/self.total_timesteps
            
        print(f'Customers total time in store: {self.total_time} minutes \
        Customers average time in store: {round(self.avg_time, 1)} minutes')
        
    def save_data(self):
        now = dt.datetime.now()
        #current_time = now.strftime()
        current_time = now.strftime("%H_%M_%S")
        filename = f'customers_data_{current_time}'
        df = pd.DataFrame(self.to_print)
        df.to_csv(filename)
        print('CSV Generated')
        
    

In [52]:
class Customer():
    
    def __init__(self, cust_id,name, state='entrance'):
        self.name = name
        self.cust_id = cust_id
        self.state = state
        
    def next_state(self):
        probs = PROBABILITIES[self.state]
        self.state = choices(STATES, weights=probs)[0]
    
    def __repr__(self):
        return f'{self.name} (customer_id: {self.cust_id}) at {self.state}'
    
    @property
    def is_active(self):
        return self.state != 'exit'
        
    

In [42]:
fake = Faker()

In [43]:
def customer_generator():
    random.seed(10)
    customer_ids = random.sample(range(10, 5000), 8)
    customer_names = [fake.name() for i in range(len(customer_ids))]
    
    return customer_ids, customer_names

In [53]:
if __name__ == '__main__':
    
    edeka = Store('Edeka')
    
    customer_ids, customer_names = customer_generator()
    for i in range(len(customer_ids)):
        cust_id = customer_ids[i]
        cust_name = customer_names[i]
    
    
        customer = Customer(cust_id, cust_name)
        edeka.add_customer(customer)
        
    

    for i in range(len(edeka.active_customers)):
        edeka.simulate_flow()
        time.sleep(2)
        
    print(f'\nThere are no more customers currently in {edeka.name}')
    #customer3 = ''
    

Thomas Norton is at Edeka entrance at 16:56:09
Christine Calderon is at Edeka entrance at 16:56:09
Dr. Bobby Perez is at Edeka entrance at 16:56:09
Jamie Phillips is at Edeka entrance at 16:56:09
Michael Odom is at Edeka entrance at 16:56:09
Jeff Kelly is at Edeka entrance at 16:56:09
Courtney Fernandez is at Edeka entrance at 16:56:09
Anna Willis is at Edeka entrance at 16:56:09

####### Moving Thomas Norton [customer_id: 4690] through Edeka #######
4690 is now at fruit: Timestamp(16:56:09)
4690 is now at dairy: Timestamp(16:59:09)
4690 is now at checkout: Timestamp(17:01:09)
4690 is now at exit: Timestamp(17:05:09)
Thomas Norton [customer_id: 4690] has just left Edeka

####### Moving Dr. Bobby Perez [customer_id: 3523] through Edeka #######
3523 is now at spices: Timestamp(17:07:09)
3523 is now at checkout: Timestamp(17:10:09)
3523 is now at exit: Timestamp(17:12:09)
Dr. Bobby Perez [customer_id: 3523] has just left Edeka

####### Moving Michael Odom [customer_id: 4745] through Edeka

***Note that the follow block shows busiest times based off data
imported from the data used to get the MCMC probabilites data***

In [None]:
shop.show_busiest()

In [54]:
edeka.most_visited()

'Most Visited Section: Checkout with 9 vists'

In [55]:
edeka.least_visited()

'Least Visited Section: Drinks with 2 vists'

In [None]:
edeka.customer_time_data()

In [None]:
edeka.save_data()