<a href="https://colab.research.google.com/github/jblcky/retail-pharmacy-inventory-optimization/blob/main/notebooks/01_inventory_optimize.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

chatgpt generated synthetic retail pharmacy sales data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
n_weeks = 104  # 2 years of weekly data
n_skus = 100   # Number of SKUs

# SKU metadata
sku_ids = [f'SKU_{i+1}' for i in range(n_skus)]
categories = ['OTC', 'Prescription', 'Cold-chain']
category_dist = [0.5, 0.4, 0.1]  # Distribution of categories
categories = np.random.choice(categories, size=n_skus, p=category_dist)

# Generate sales data
dates = pd.date_range(start='2023-01-01', periods=n_weeks, freq='W-MON')
sales_data = []

for sku in sku_ids:
    units_sold = np.random.poisson(lam=200, size=n_weeks)  # Poisson distribution for sales
    unit_cost = np.random.uniform(10, 50)  # Random cost between $10 and $50
    unit_price = unit_cost * np.random.uniform(1.5, 2.5)  # Price is 1.5 to 2.5 times cost
    lead_time_days = np.random.choice([7, 14, 21], size=n_weeks)  # Random lead times
    shelf_life_days = np.random.choice([30, 60, 90], size=n_weeks)  # Random shelf life
    promotion_flag = np.random.choice([0, 1], size=n_weeks, p=[0.8, 0.2])  # 20% chance of promotion
    holiday_flag = np.random.choice([0, 1], size=n_weeks, p=[0.9, 0.1])  # 10% chance of holiday

    df = pd.DataFrame({
        'date': dates,
        'sku_id': sku,
        'category': categories[sku_ids.index(sku)],
        'units_sold': units_sold,
        'unit_cost': unit_cost,
        'unit_price': unit_price,
        'lead_time_days': lead_time_days,
        'shelf_life_days': shelf_life_days,
        'promotion_flag': promotion_flag,
        'holiday_flag': holiday_flag
    })
    sales_data.append(df)

# Combine all SKU data into a single DataFrame
sales_df = pd.concat(sales_data, ignore_index=True)

# Save to Google Drive
sales_df.to_csv('/content/drive/My Drive/pharmacy_sales_data.csv', index=False)

# Display the first few rows
sales_df.head()


Unnamed: 0,date,sku_id,category,units_sold,unit_cost,unit_price,lead_time_days,shelf_life_days,promotion_flag,holiday_flag
0,2023-01-02,SKU_1,OTC,192,25.940189,54.885552,7,90,0,0
1,2023-01-09,SKU_1,OTC,222,25.940189,54.885552,14,30,1,0
2,2023-01-16,SKU_1,OTC,196,25.940189,54.885552,21,90,0,0
3,2023-01-23,SKU_1,OTC,188,25.940189,54.885552,14,30,0,0
4,2023-01-30,SKU_1,OTC,191,25.940189,54.885552,14,30,1,0
