In [1]:
# setting the random seed for reproducibility
import random
random.seed(493)

import pandas as pd
import numpy as np

# for working with timestamps
from datetime import datetime
from dateutil.parser import parse

# to print out all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# Function to add multiple rows with random "Item Number" and "Quantity"
def add_random_rows(df, date):
    num_rows = np.random.randint(1, 6)  # Random number of rows from 1 to 5
    
    # Generate a random hour between 4 AM (4) and 10 PM (22)
    hour = np.random.randint(4, 23)
    # # Generate a random minute between 0 and 59
    # minute = np.random.randint(0, 60)
    # Create the fixed datetime with the specified date and random time
    fixed_datetime = date.replace(hour=hour)
    
    for _ in range(num_rows):
        new_row = {
            'Date': fixed_datetime,
            'Item Number': np.random.randint(1001, 1018),
            'Quantity': np.random.randint(1, 5),  # Quantity from 1 to 4
            'Store Number': np.random.randint(1001, 1008)  # Random store number from 1001 to 1007
        }
        df = df._append(new_row, ignore_index=True)
    return df

# Create an initial empty DataFrame with the specified columns
dfx = pd.DataFrame(columns=["Date", "Item Number", "Quantity", "Store Number"])

# Add new rows with random values for the initial date
dfx = add_random_rows(dfx, pd.to_datetime('2024-07-01'))

# Generating 19 random datetimes
random_datetimes = [pd.to_datetime('2024-07-01') + pd.DateOffset(days=random.randint(0, 30)) for _ in range(19)]

# Adding rows for each store number from 1001 to 1007
for store_number in range(1001, 1008):
    df = pd.DataFrame(columns=['Date', 'Item Number', 'Quantity'])  # Initialize df for each store number
    for datetime in random_datetimes:
        df = add_random_rows(df, datetime)
    df['Store Number'] = store_number  # Set the store number for this iteration
    dfx = pd.concat([dfx, df], ignore_index=True)  # Append df to dfx

# Print the updated DataFrame
print("Updated DataFrame with added rows:")
print(dfx)

In [None]:
#  dfx.to_csv("../data/in/raw.csv", index=False)

In [None]:
dfx

In [None]:
# Creating an empty DataFrame with the specified columns
df_labor = pd.DataFrame(columns=["Date", "Hour", "Number of Personnel"])


for store_num in range(1001, 1021):
    
    # Generating data for each hour of each day in July 2024
    for july_day in range(1, 32):
        july_date = pd.to_datetime('2024-07-' + str(july_day))
        for hour in range(0, 24):
            num_personnel = random.randint(2, 9)
            df_labor = df_labor._append({"Store Number": store_num, "Date": july_date, "Hour": hour, "Number of Personnel": num_personnel}, ignore_index=True)

# Converting 'Date' column to datetime format
df_labor['Date'] = pd.to_datetime(df_labor['Date'])

# Displaying the first few rows of the DataFrame
df_labor.head()

In [None]:
print(july_dates)

In [None]:
df_labor.info()

In [None]:
df_labor.head()

In [None]:
# df_labor.to_csv("../data/in/labor.csv", index=False)

In [None]:
# Creating an empty DataFrame with the specified columns
df_labor = pd.DataFrame(columns=["Store Number", "Date", "Hour", "Number of Personnel"])

for store_num in range(1001, 1021):
    # Generating data for each hour of each day in July 2024
    for july_day in range(1, 32):
        july_date = pd.to_datetime(f'2024-07-{july_day}')
        for hour in range(0, 24):
            num_personnel = random.randint(2, 9)
            df_labor = df_labor._append({"Store Number": store_num, "Date": july_date, "Hour": hour, "Number of Personnel": num_personnel}, ignore_index=True)

# Converting 'Date' and 'Hour' columns to a single 'Datetime' column
df_labor['Datetime'] = pd.to_datetime(df_labor['Date']) + pd.to_timedelta(df_labor['Hour'], unit='h')

In [None]:
dfl = df_labor.drop(columns=["Date", "Hour"])

In [None]:
dfl.head()

In [None]:
dfl.to_csv("../data/in/labor.csv", index=False)

In [9]:
# Create empty lists to store the data
date_list = []
item_number_list = []
quantity_list = []
store_number_list = []

# Define the date range (assuming a particular month and year)
start_date = datetime(2024, 7, 1)
end_date = datetime(2024, 7, 31)
date_range = pd.date_range(start_date, end_date, freq='h')

# Generate the data
for store_number in range(1001, 1041):
    for single_date in date_range:
        for item_number in range(1001, 1018):
            quantity = random.randint(0, 50)
            date_list.append(single_date)
            item_number_list.append(item_number)
            quantity_list.append(quantity)
            store_number_list.append(store_number)

# Create the DataFrame
data = {
    'Date': date_list,
    'Item Number': item_number_list,
    'Quantity': quantity_list,
    'Store Number': store_number_list
}

df = pd.DataFrame(data)


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 490280 entries, 0 to 490279
Data columns (total 4 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   Date          490280 non-null  datetime64[ns]
 1   Item Number   490280 non-null  int64         
 2   Quantity      490280 non-null  int64         
 3   Store Number  490280 non-null  int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 15.0 MB


In [11]:
df.head()

Unnamed: 0,Date,Item Number,Quantity,Store Number
0,2024-07-01,1001,16,1001
1,2024-07-01,1002,43,1001
2,2024-07-01,1003,19,1001
3,2024-07-01,1004,37,1001
4,2024-07-01,1005,16,1001


In [12]:
df.tail()

Unnamed: 0,Date,Item Number,Quantity,Store Number
490275,2024-07-31,1013,2,1040
490276,2024-07-31,1014,18,1040
490277,2024-07-31,1015,17,1040
490278,2024-07-31,1016,40,1040
490279,2024-07-31,1017,30,1040


In [13]:
df.to_csv("../data/in/raw.csv", index=False)

In [15]:
# Create an empty DataFrame
df_labor = pd.DataFrame(columns=["Store Number", "DateTime", "Number of Personnel"])

# Generate data for each store, each day in July 2024, and each hour of the day
for store_num in range(1001, 1041):
    for july_day in range(1, 32):
        for hour in range(0, 24):
            july_datetime = pd.to_datetime(f'2024-07-{july_day} {hour}:00:00')
            num_personnel = random.randint(2, 9)
            df_labor = df_labor._append({"Store Number": store_num, "DateTime": july_datetime, "Number of Personnel": num_personnel}, ignore_index=True)

  df_labor = df_labor._append({"Store Number": store_num, "DateTime": july_datetime, "Number of Personnel": num_personnel}, ignore_index=True)


In [16]:
df_labor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29760 entries, 0 to 29759
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Store Number         29760 non-null  object        
 1   DateTime             29760 non-null  datetime64[ns]
 2   Number of Personnel  29760 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 697.6+ KB


In [17]:
df_labor.head()

Unnamed: 0,Store Number,DateTime,Number of Personnel
0,1001,2024-07-01 00:00:00,6
1,1001,2024-07-01 01:00:00,9
2,1001,2024-07-01 02:00:00,7
3,1001,2024-07-01 03:00:00,7
4,1001,2024-07-01 04:00:00,7


In [18]:
df_labor.tail()

Unnamed: 0,Store Number,DateTime,Number of Personnel
29755,1040,2024-07-31 19:00:00,5
29756,1040,2024-07-31 20:00:00,4
29757,1040,2024-07-31 21:00:00,5
29758,1040,2024-07-31 22:00:00,8
29759,1040,2024-07-31 23:00:00,2


In [19]:
df_labor.to_csv("../data/in/labor.csv", index=False)

In [6]:
# Assuming df is your existing dataframe with a 'DateTime' column of datetime objects
# Example dataframe creation
start_date = '2024-07-01 00:00:00'
end_date = '2024-07-31 23:00:00'
date_range = pd.date_range(start=start_date, end=end_date, freq='h')
dft = pd.DataFrame(date_range, columns=['DateTime'])

# Convert datetime to text (string format)
dft['DateTime'] = dft['DateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')


In [7]:
dft.head()

Unnamed: 0,DateTime
0,2024-07-01 00:00:00
1,2024-07-01 01:00:00
2,2024-07-01 02:00:00
3,2024-07-01 03:00:00
4,2024-07-01 04:00:00


In [8]:
df_time.to_csv("../data/in/calendar.csv", index=False)