In [33]:
import warnings
import pandas as pd
from data_analysis.DataProcessor import DataProcessor

warnings.simplefilter(action='ignore', category=FutureWarning)

# read in combined stock data
preprocessor = DataProcessor()
# create df to current date
df = preprocessor.process_nrw_data("06_22")
# remove bike sale since it is not relevant
df = preprocessor.remove_fahrrad_sale(df)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['price'] = pd.to_numeric(df['price'], errors='coerce')

File already exists: C:\Users\FelixNeubauer\UniRepos\webscraper\data_analysis\processed_nrw_data\06_22_nrw_stock_data.csv. Loading DataFrame from file.


In [34]:
import pandas as pd
import numpy as np
from IPython.display import display

# Define price ranges and labels
def define_price_ranges(df, price_bins):
    price_labels = [f'{price_bins[i]}-{price_bins[i+1]}' for i in range(len(price_bins)-1)]
    price_labels.append(f'{price_bins[-1]}+')
    df['price_range'] = pd.cut(df['price'], bins=price_bins + [np.inf], labels=price_labels, include_lowest=True)
    return df

# Filter DataFrame to include only product variants that were in stock at least once
def filter_in_stock_variants(df):
    in_stock_variants = df[df['store_quantity'] > 0][['id', 'store_storeId']].drop_duplicates()
    df_filtered = df.merge(in_stock_variants, on=['id', 'store_storeId'])
    return df_filtered

# Function to identify restocks and calculate restock amounts
def identify_and_calculate_restocks(df):
    df = df.sort_values(by=['id', 'store_storeId', 'timestamp'])
    df['quantity_diff'] = df.groupby(['id', 'store_storeId'])['store_quantity'].diff()
    df['restock'] = df['quantity_diff'] > 0
    df['restock_amount'] = df['quantity_diff'].where(df['restock'], 0)
    df['quantity_before_restock'] = df['store_quantity'] - df['restock_amount']
    return df

# Calculate average stock quantity per sub-category, price range, and store
def calculate_average_stock(df, price_bins):
    df = df[df['store_quantity'] >= 0]
    df = filter_in_stock_variants(df)
    df = define_price_ranges(df, price_bins)
    
    avg_stock_quantity_per_store = df.groupby(['store_storeId', 'sub_category', 'price_range'])['store_quantity'].mean().reset_index()
    avg_stock_quantity_per_store.rename(columns={'store_quantity': 'avg_stock_quantity'}, inplace=True)
    
    avg_stock_quantity_combined = df.groupby(['sub_category', 'price_range'])['store_quantity'].mean().reset_index()
    avg_stock_quantity_combined.rename(columns={'store_quantity': 'avg_stock_quantity'}, inplace=True)
    avg_stock_quantity_combined['store_storeId'] = 'All Stores'
    
    combined_avg_stock_quantity = pd.concat([avg_stock_quantity_per_store, avg_stock_quantity_combined], ignore_index=True)
    
    return combined_avg_stock_quantity

# Calculate average restock amount and typical restock points
def calculate_average_restock(df, price_bins):
    df = filter_in_stock_variants(df)
    df = define_price_ranges(df, price_bins)
    df = identify_and_calculate_restocks(df)
    
    avg_restock_amount = df[df['restock']].groupby(['sub_category', 'price_range'])['restock_amount'].mean().reset_index()
    avg_restock_amount.rename(columns={'restock_amount': 'avg_restock_amount'}, inplace=True)
    
    avg_quantity_before_restock = df[df['restock']].groupby(['sub_category', 'price_range'])['quantity_before_restock'].mean().reset_index()
    avg_quantity_before_restock.rename(columns={'quantity_before_restock': 'avg_quantity_before_restock'}, inplace=True)
    
    combined_restock_info = pd.merge(avg_restock_amount, avg_quantity_before_restock, on=['sub_category', 'price_range'])
    
    return combined_restock_info

# Calculate reorder stock level and time to restock
def calculate_reorder_stock_and_time(df, price_bins):
    df = filter_in_stock_variants(df)
    df = define_price_ranges(df, price_bins)
    df = identify_and_calculate_restocks(df)
    
    # Calculate reorder stock level
    reorder_stock_level = df[df['restock']].groupby(['sub_category', 'price_range'])['quantity_before_restock'].mean().reset_index()
    reorder_stock_level.rename(columns={'quantity_before_restock': 'reorder_stock_level'}, inplace=True)
    
    # Calculate time to restock
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['time_to_restock'] = df.groupby(['id', 'store_storeId'])['timestamp'].diff().shift(-1)
    
    avg_time_to_restock = df[df['restock']].groupby(['sub_category', 'price_range'])['time_to_restock'].mean().reset_index()
    avg_time_to_restock.rename(columns={'time_to_restock': 'avg_time_to_restock'}, inplace=True)
    
    # Combine reorder stock level and time to restock
    combined_reorder_info = pd.merge(reorder_stock_level, avg_time_to_restock, on=['sub_category', 'price_range'])
    
    return combined_reorder_info

# Example price bins (can be adjusted based on your data)
price_bins = [0, 10, 20, 50, 100, 200, 500, 1000]

# Ensure rows with sub_category "fahrrad_sale" are removed
df = df[df['sub_category'] != 'fahrrad_sale']

# Calculate the average stock quantity
avg_stock_quantity_df = calculate_average_stock(df, price_bins)

# Display the average stock quantity results
display(avg_stock_quantity_df)

# Save average stock quantity DataFrame to CSV
avg_stock_quantity_df.to_csv('data/avg_stock_quantity.csv', index=False)

# Calculate the average restock amount and quantity before restock
restock_info_df = calculate_average_restock(df, price_bins)

# Display the restock information results
display(restock_info_df)

# Save restock information DataFrame to CSV
restock_info_df.to_csv('data/restock_info.csv', index=False)

# Calculate reorder stock levels and time to restock
reorder_info_df = calculate_reorder_stock_and_time(df, price_bins)

# Display the reorder information results
display(reorder_info_df)

# Save reorder information DataFrame to CSV
reorder_info_df.to_csv('data/reorder_info.csv', index=False)


Unnamed: 0,store_storeId,sub_category,price_range,avg_stock_quantity
0,0070008500085,campingmobel,0-10,13.898990
1,0070008500085,campingmobel,10-20,24.983471
2,0070008500085,campingmobel,20-50,14.550676
3,0070008500085,campingmobel,50-100,5.129870
4,0070008500085,campingmobel,100-200,2.787879
...,...,...,...,...
723,All Stores,zelte,50-100,7.913687
724,All Stores,zelte,100-200,11.311209
725,All Stores,zelte,200-500,3.267702
726,All Stores,zelte,500-1000,2.337790


Unnamed: 0,sub_category,price_range,avg_restock_amount,avg_quantity_before_restock
0,campingmobel,0-10,6.742424,11.787879
1,campingmobel,10-20,32.225256,7.996587
2,campingmobel,20-50,25.009121,1.800651
3,campingmobel,50-100,3.484536,5.572165
4,campingmobel,100-200,2.387755,4.346939
5,campingmobel,200-500,,
6,campingmobel,500-1000,,
7,campingmobel,1000+,,
8,fahrrad_sale,0-10,,
9,fahrrad_sale,10-20,,


Unnamed: 0,sub_category,price_range,reorder_stock_level,avg_time_to_restock
0,campingmobel,0-10,11.787879,0 days 18:06:04.412096282
1,campingmobel,10-20,7.996587,0 days 05:50:24.242666389
2,campingmobel,20-50,1.800651,0 days 03:16:23.960533168
3,campingmobel,50-100,5.572165,1 days 06:22:26.574803348
4,campingmobel,100-200,4.346939,1 days 02:54:58.521141822
5,campingmobel,200-500,,NaT
6,campingmobel,500-1000,,NaT
7,campingmobel,1000+,,NaT
8,fahrrad_sale,0-10,,NaT
9,fahrrad_sale,10-20,,NaT
