In [None]:
import os
import pandas as pd
from tqdm import tqdm

Loading Data

In [None]:
def load_data(file_name):
    """Load and preprocess the data."""
    sls = pd.read_feather(file_name)
    sls['Date'] = pd.to_datetime(sls['Date'])
    sls.drop_duplicates(subset=['Store_ID2', 'Product_ID', 'Date'], inplace=True)
    return sls

Generating Stocks

In [None]:
def generate_stocks(sls):
    """Generate a dataframe of stocks with restock dates."""
    stocks = sls.groupby(by=['Store_ID2', 'Product_ID'], as_index=False).agg({'Date': ['min', 'max']})
    stocks.columns = ['_'.join(col).strip() for col in stocks.columns.values]
    stocks.columns = stocks.columns.str.rstrip('_')
    stocks.query('Date_min != Date_max', inplace=True)
    stocks['Date_min'] = stocks['Date_min'] - pd.Timedelta(days=1)
    stocks['Date'] = stocks.apply(lambda row: pd.date_range(start=row['Date_min'], end=row['Date_max']).tolist(), axis=1)
    stocks = stocks.explode('Date')
    stocks.drop(columns=['Date_max'], inplace=True)
    stocks['Date'] = pd.to_datetime(stocks['Date'])
    return stocks

Calculate restock dates

In [None]:
def calculate_restock_days(group):
    """Calculate the next restock day for each row in the group."""
    first_date = group['Date'].min()
    group['Next_Restock_Day'] = group['Date'].apply(lambda date: first_date + pd.DateOffset(weeks=((date - first_date).days // 7 + 1)))
    return group

Calculate stocks

In [None]:
def calculate_stock(group):
    """Calculate the current stock for each row in the group."""
    group = group.sort_values('Date')
    for i in range(1, len(group)):
        if group.iloc[i]['Date'] == group.iloc[i]['Date_min']:
            continue
        elif group.iloc[i]['Date'] == group.iloc[i]['Next_Restock_Day'] - pd.Timedelta(days=7):
            group.iloc[i, group.columns.get_loc('Current_Stock')] = group.iloc[i-1]['Current_Stock'] - group.iloc[i]['Quantity'] + group.iloc[i-1]['Quantity_to_arrive']
        else:
            group.iloc[i, group.columns.get_loc('Current_Stock')] = group.iloc[i-1]['Current_Stock'] - group.iloc[i]['Quantity']
    return group

In [None]:
def calculate_quantity_to_arrive(group):
    group['Quantity_to_arrive'] = group.groupby('Next_Restock_Day')['Quantity'].transform('sum')
    return group

In [None]:
# Define the current directory and file name
current_directory = os.getcwd()
file_name = f'{current_directory}/../datasets/cigarettes_treated.feather'

In [None]:
# Load and preprocess the data
sls = load_data(file_name)

In [None]:
# Generate the stocks dataframe
stocks = generate_stocks(sls)

In [None]:
# Calculate the restock days
stocks = stocks.groupby(['Store_ID2', 'Product_ID']).apply(calculate_restock_days)


In [None]:
# Merge the stocks and sls dataframes
operation = stocks.merge(sls, on=['Store_ID2', 'Product_ID', 'Date'], how='left')
operation['Quantity'] = operation['Quantity'].fillna(0)
operation['Date'] = pd.to_datetime(operation['Date'])

In [None]:
# Calculate the restock days again
operation = operation.groupby(['Store_ID2', 'Product_ID']).apply(calculate_restock_days)


In [None]:
operation = operation.groupby(['Store_ID2', 'Product_ID']).apply(calculate_quantity_to_arrive)

In [None]:
# Calculate the current stock
operation['Current_Stock'] = operation.apply(lambda row: row['Quantity_to_arrive'] + 10 if row['Date'] == row['Date_min'] else 0, axis=1)


In [None]:
# Calculate the stock for each group
groups = [calculate_stock(group) for _, group in tqdm(operation.groupby(['Store_ID2', 'Product_ID']))]


In [None]:
# Concatenate the groups and save the result
operation = pd.concat(groups)

In [None]:
operation.to_csv(f'{current_directory}/cigarettes_treated_w_stocks.csv', index=False)
operation.to_feather(f'{current_directory}/cigarettes_treated_w_stocks.feather')