In [1]:
!pip install names









In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import logging  
import names

In [45]:
def collect_data():
    """ 
         1. Read CSV file for each working day
           rearrange indices to have unique identifier 
           for each customer, independently of the shopping day.
         2. timestamp column is converted into datetime format
         3. A single dataframe is returned, concatenation of all intermediate dataframes
         """
    monday = pd.read_csv("data/monday.csv", sep=";",   parse_dates=True)
    tuesday = pd.read_csv("data/tuesday.csv", sep=";",   parse_dates=True)
    wednesday = pd.read_csv("data/wednesday.csv", sep=";",   parse_dates=True)
    thursday = pd.read_csv("data/thursday.csv", sep=";",   parse_dates=True)
    friday = pd.read_csv("data/friday.csv", sep=";",   parse_dates=True)
    
    tuesday["customer_no"] =   tuesday["customer_no"] + monday["customer_no"].max()
    wednesday["customer_no"] =   wednesday["customer_no"] + tuesday["customer_no"].max()
    thursday["customer_no"] =   thursday["customer_no"] + wednesday["customer_no"].max()
    friday["customer_no"] =   friday["customer_no"] + thursday["customer_no"].max()

    week_days= [monday, tuesday, wednesday, thursday, friday]
    dfd = pd.concat(week_days)
    dfd["timestamp"] = pd.to_datetime(dfd["timestamp"])
    ## below part added to have entrance time  for each customer.
    entries = dfd.groupby("customer_no").first()[["timestamp"]] - timedelta(minutes=3)
    entries.reset_index(inplace=True)
    entries["customer_no"] = list(range(1,len(entries)+1))
    entries["location"] = "entrance"
    df = pd.concat([dfd, entries]).sort_values(by=["timestamp"])
    df.index = range(df.shape[0])
    return df

In [46]:
df = collect_data()

In [47]:
df

Unnamed: 0,timestamp,customer_no,location
0,2019-09-02 07:00:00,1,entrance
1,2019-09-02 07:00:00,2,entrance
2,2019-09-02 07:01:00,3,entrance
3,2019-09-02 07:01:00,4,entrance
4,2019-09-02 07:01:00,5,entrance
...,...,...,...
32317,2019-09-06 21:50:00,7431,fruit
32318,2019-09-06 21:50:00,7435,dairy
32319,2019-09-06 21:50:00,7442,checkout
32320,2019-09-06 21:50:00,7443,checkout


In [48]:
class Customer:
    """ a single customer that moves through the supermarket in a MMC simulation
    """
    def __init__(self, name, tprobmatrix ):
        self.name = name
        self.location = "entrance"
        self.tprobability = tprobmatrix
        self.budget =  0
        self.all_locations =  ["checkout", "dairy", "drinks", "fruit","spices" ]
        #user_os = getpass.getuser()
        #logging.warning(f"Hi  {user_os.capitalize()}!")

    def __repr__(self):
        return f"Customer {self.name} is in the {self.location} section."
    def next_location(self):
        """ propagates the customer to the next step
        based on the probability transition matrix.
        Returns nothing."""
        #locations = np.array(["entrance", "checkout", "dairy", "drinks", "fruit","spices" ])

        self.location = np.random.choice(self.all_locations,  p=self.tprobability.loc[self.location])
        return self.location
    def has_left(self):
        """ checl if customer is still inside the Supermarket"""
        #churned_customers = []
        if self.location =="checkout":
            logging.debug(f" Customer {self.name} has left the supermarket ")


            return True
        else:
            return False

In [49]:
def transistion_prob(df): 
    dfs = df.copy()
    dfs["next_location"] = dfs.groupby('customer_no')['location'].shift(-1)
    logging.warning(dfs.isna().value_counts())
    logging.warning(dfs.sample(10))
    dfs.fillna("checkout", inplace=True)
    pmatrix = pd.crosstab(dfs['location'], dfs['next_location'], normalize=0)
    logging.warning(f"The Probability Transistion Matrix:\n {pmatrix}")
    return pmatrix    #.to_numpy()

In [50]:
pmatrix = transistion_prob(df)

False      False        False     False            24877
                                  True              7445
dtype: int64
20562 2019-09-05 10:09:00         4767     fruit      checkout
31235 2019-09-06 19:18:00         7193  checkout           NaN
2563  2019-09-02 13:35:00          576  checkout           NaN
18824 2019-09-04 20:58:00         4342     dairy        spices
11282 2019-09-03 19:05:00         2588     dairy        spices
13237 2019-09-04 08:36:00         3057  checkout           NaN
5596  2019-09-02 19:54:00         1260  checkout           NaN
23163 2019-09-05 17:09:00         5365     fruit      checkout
1565  2019-09-02 10:44:00          350     fruit      checkout
7346  2019-09-03 08:59:00         1662     dairy        drinks
 next_location  checkout     dairy    drinks     fruit    spices
location                                                       
checkout       1.000000  0.000000  0.000000  0.000000  0.000000
dairy          0.393033  0.000000  0.222483  0.189

In [60]:
timenow = datetime.now()
nextime  = timenow #.strftime("%H:%M")
str_nextime = nextime.strftime("%H:%M")
with open("simulation.csv", "w") as file1:
      
    for i in range(100):
        cstm = Customer(names.get_full_name(),  tprobmatrix=pmatrix)
        #timenowstr = (timenow + timedelta(minutes=2)).strftime("%H:%M")
        
        file1.write(f"{cstm.name}, {str_nextime},  {cstm.location}\n")
        while not cstm.has_left():
            
            file1.write(
            f"{cstm.name}, {str_nextime},  {cstm.next_location()}\n")
            nextime += timedelta(minutes=1)
            str_nextime = nextime.strftime("%H:%M")
new_df = pd.read_csv("simulation.csv")
new_df.columns = ["name", "Timestamp", "location"]
new_df

Unnamed: 0,name,Timestamp,location
0,Kay Johnson,15:01,dairy
1,Kay Johnson,15:02,drinks
2,Kay Johnson,15:03,checkout
3,Michael Omalley,15:04,entrance
4,Michael Omalley,15:04,spices
...,...,...,...
427,Florence Maple,20:30,spices
428,Florence Maple,20:31,checkout
429,Rachel Bussey,20:32,entrance
430,Rachel Bussey,20:32,drinks
