In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime as dt

In [2]:
files = os.listdir(path='data')

In [3]:
files

['friday.csv', 'monday.csv', 'thursday.csv', 'tuesday.csv', 'wednesday.csv']

In [4]:
def read_files(files, datapath='data/', delimiter = ';'):
        df_temp = []

        if type(files) == list:
            for i in files:
                df = pd.read_csv(datapath+i, delimiter=delimiter, parse_dates=True, index_col='timestamp')
                day_name = df.index.day_name()[0][:3]
                df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{x}')
                df_temp.append(df)
            df = pd.concat(df_temp)
        else:
            df = pd.read_csv(files, delimiter=delimiter, parse_dates=True, index_col='timestamp')
            day_name = df.index.day_name()[0][:3]
            df['customer_no'] = df.customer_no.apply(lambda x: f'{day_name}_{x}')

        return df

In [5]:
shop_data = read_files(files)

# Part 1: Data Exploration

In [6]:
shop_data = shop_data.sort_values(by='timestamp')

In [7]:
#shop_data = shop_data.reset_index()

In [143]:
shop_data

Unnamed: 0_level_0,customer_no,location
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-09-02 07:03:00,Mon_1,dairy
2019-09-02 07:03:00,Mon_2,dairy
2019-09-02 07:04:00,Mon_3,dairy
2019-09-02 07:04:00,Mon_4,dairy
2019-09-02 07:04:00,Mon_5,spices
...,...,...
2019-09-06 21:50:00,Fri_1509,drinks
2019-09-06 21:50:00,Fri_1507,checkout
2019-09-06 21:50:00,Fri_1508,checkout
2019-09-06 21:50:00,Fri_1496,fruit


In [46]:
df_timefill = shop_data.groupby(['customer_no', 'location']).resample('T').ffill()

In [48]:
df_timefill = df_timefill.droplevel('location').reset_index(level=1)

In [60]:
df_timefill = df_timefill.drop('customer_no', axis=1)

KeyError: "['customer_no'] not found in axis"

In [62]:
df_timefill = df_timefill.reset_index()

In [65]:
customer_flow = pd.pivot_table(df_timefill, index='customer_no', columns='location', values='timestamp' )

In [None]:
customer_flow.apply(np.sort, axis=1)

# Create entrance times and fill checkout times

In [110]:
def fill_enter(dataframe):
    
    entry_time = dataframe.loc[dataframe.notnull()].min() - dt.timedelta(minutes=1)
    return entry_time
    #if dataframe.entrance != dataframe.entrance:
    #    entry_time = dataframe.loc[dataframe.notnull()].min() - timedelta(minuntes=1)
    #    return entry_time
    #elif dataframe.entrance:
    #    return dataframe.entrance

In [104]:
def fill_exit(dataframe):
    if dataframe.checkout != dataframe.checkout:
        closing_time = dataframe.loc[dataframe.notnull()].max().round('H')
        return closing_time
    elif dataframe.checkout:
        return dataframe.checkout

In [111]:
customer_flow['entrance'] = customer_flow.apply(fill_enter, axis=1)
customer_flow['checkout'] = customer_flow.apply(fill_exit, axis=1)

In [183]:
customer_flow = customer_flow.reset_index(level=0)

In [184]:
customer_flow = customer_flow.melt(id_vars='customer_no', value_name='timestamp')

In [185]:
customer_flow = customer_flow.dropna()

# Shift location one step

In [186]:
customer_transitions = customer_flow.sort_values(by=['customer_no', 'timestamp'])

In [187]:
customer_transitions = customer_transitions[['customer_no', 'location']]

In [188]:
customer_transitions['location+1'] = customer_transitions.groupby('customer_no')['location'].shift(-1)

In [189]:
customer_transitions

Unnamed: 0,customer_no,location,location+1
37225,Fri_1,entrance,dairy
7445,Fri_1,dairy,spices
29780,Fri_1,spices,checkout
0,Fri_1,checkout,
37226,Fri_10,entrance,fruit
...,...,...,...
29778,Wed_998,fruit,checkout
7443,Wed_998,checkout,
44669,Wed_999,entrance,fruit
29779,Wed_999,fruit,checkout


# Create Probabilities table

In [190]:
probabilites = pd.crosstab(customer_transitions['location'], customer_transitions['location+1'], normalize=0)

In [191]:
probabilites

location+1,checkout,dairy,drinks,fruit,spices
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dairy,0.452069,0.0,0.2022,0.174175,0.171556
drinks,0.625078,0.054449,0.0,0.165215,0.155258
entrance,0.0,0.30544,0.16454,0.357018,0.173002
fruit,0.587302,0.173669,0.123249,0.0,0.11578
spices,0.406399,0.21273,0.234513,0.146358,0.0


# Predict states

In [192]:
from random import choices

In [197]:
location_states = list(customer_transitions.location.unique())
location_states

['entrance', 'dairy', 'spices', 'checkout', 'fruit', 'drinks']