<a href="https://colab.research.google.com/github/aayush-jain-dtu/inventory-stock-prediction/blob/main/dataset_creation_with_pattern.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from datetime import date, timedelta
import random

In [None]:
# Define product information and their demand patterns
products = {
    'P001': {'title': 'iPhone 15', 'category': 'Electronics', 'price_inr': 89900, 'stock_range': (50, 200), 'current_stock': 150, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'monthly_start_and_festive_spike'},
    'P002': {'title': 'Smart Watch', 'category': 'Electronics', 'price_inr': 15000, 'stock_range': (100, 500), 'current_stock': 400, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'new_year_and_festive_spike'},
    'P003': {'title': 'Bluetooth Speakers', 'category': 'Electronics', 'price_inr': 9500, 'stock_range': (150, 600), 'current_stock': 500, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'summer_and_festive_spike'},
    'P004': {'title': "Men's Winter Jacket", 'category': 'Fashion', 'price_inr': 3500, 'stock_range': (500, 2000), 'current_stock': 1800, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'winter_spike'},
    'P005': {'title': 'Summer Cotton T-Shirts', 'category': 'Fashion', 'price_inr': 1500, 'stock_range': (800, 3000), 'current_stock': 2500, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'summer_spike'},
    'P006': {'title': 'Festive Sarees', 'category': 'Fashion', 'price_inr': 6000, 'stock_range': (500, 2500), 'current_stock': 2000, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'festive_wedding_spike'},
    'P007': {'title': 'Treadmill', 'category': 'Gym Products', 'price_inr': 42000, 'stock_range': (5, 50), 'current_stock': 40, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'jan_june_spike'},
    'P008': {'title': 'Dumbbell Set', 'category': 'Gym Products', 'price_inr': 2800, 'stock_range': (200, 1000), 'current_stock': 800, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'jan_jun_spike'},
    'P009': {'title': 'Yoga Mats', 'category': 'Gym Products', 'price_inr': 1500, 'stock_range': (800, 3000), 'current_stock': 2500, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'jan_may_spike'},
    'P010': {'title': 'Soft Drink Cases', 'category': 'Groceries & Beverages', 'price_inr': 2200, 'stock_range': (400, 1500), 'current_stock': 1200, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'summer_heat_spike'},
    'P011': {'title': 'Packaged Snacks', 'category': 'Groceries & Beverages', 'price_inr': 500, 'stock_range': (300, 1200), 'current_stock': 900, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'festive_holidays_spike'},
    'P012': {'title': 'Tea & Coffee Packs', 'category': 'Groceries & Beverages', 'price_inr': 400, 'stock_range': (200, 1000), 'current_stock': 700, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'winter_drinks_spike'},
    'P013': {'title': 'Notebooks (Bundle)', 'category': 'Stationery & School Products', 'price_inr': 600, 'stock_range': (300, 1200), 'current_stock': 1000, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'school_spike'},
    'P014': {'title': 'Pens & Markers Set', 'category': 'Stationery & School Products', 'price_inr': 300, 'stock_range': (100, 500), 'current_stock': 400, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'school_spike'},
    'P015': {'title': 'School Backpacks', 'category': 'Stationery & School Products', 'price_inr': 800, 'stock_range': (100, 400), 'current_stock': 350, 'last_restock': date(2018, 9, 1), 'demand_pattern': 'school_spike'}
}

In [None]:
# Define clients
clients = [f'C{str(i).zfill(3)}' for i in range(1, 13)]


In [None]:
# Define date range
start_date = date(2018, 9, 1)
end_date = date(2025, 9, 30)
date_list = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]

In [None]:
# Define restock intervals
restock_intervals = {pid: random.randint(30, 60) for pid in products.keys()}

In [None]:
data = []
order_id_counter = 1
rows_generated = 0

In [None]:
while rows_generated < 6000:
    order_date = random.choice(date_list)
    order_year = order_date.year
    order_month = order_date.month
    order_day = order_date.day

    # --- Pattern-based product selection and quantity logic ---

    # Assign higher weights to products based on the month
    product_weights = {pid: 1 for pid in products.keys()} # Base weight

    # Electronics
    if order_month in [10, 11, 12]:
        product_weights['P001'] *= 2 # Festive spike for iPhone
        product_weights['P002'] *= 2
        product_weights['P003'] *= 2
    if order_month == 1:
        product_weights['P002'] *= 2 # New Year's spike for Smart Watch
    if order_month in [5, 6, 7]:
        product_weights['P003'] *= 2 # Summer vacation for Speakers

    # Fashion
    if order_month in [10, 11, 12]:
        product_weights['P004'] *= 3 # Winter spike for jackets
        product_weights['P006'] *= 3 # Festive Sarees
    if order_month in [3, 4, 5, 6, 7]:
        product_weights['P005'] *= 3 # Summer T-shirts
    if order_month in [2, 3]:
        product_weights['P006'] *= 3 # Wedding season for Sarees

    # Gym Products
    if order_month in [1]:
        product_weights['P007'] *= 3 # New Year's resolutions
        product_weights['P008'] *= 3
        product_weights['P009'] *= 3
    if order_month in [6, 7]:
        product_weights['P007'] *= 2 # Fitness before monsoon
        product_weights['P008'] *= 2
        product_weights['P009'] *= 2

    # Groceries & Beverages
    if order_month in [4, 5, 6, 7]:
        product_weights['P010'] *= 3 # Summer heat for Soft Drinks
    if order_month in [10, 11, 12]:
        product_weights['P011'] *= 2 # Festivals for Snacks
    if order_month in [11, 12, 1, 2]:
        product_weights['P012'] *= 3 # Winter for Tea & Coffee

    # Stationery
    if order_month in [4, 5, 6]:
        product_weights['P013'] *= 3
        product_weights['P014'] *= 3
        product_weights['P015'] *= 3

    # Select product based on weighted probability
    product_ids = list(products.keys())
    selected_product_id = random.choices(product_ids, weights=[product_weights[pid] for pid in product_ids], k=1)[0]
    product_info = products[selected_product_id]

    # --- Aggressive Replenishment Logic ---
    # Check if stock is below a safety threshold
    safety_stock = product_info['stock_range'][0] * 0.2
    if product_info['current_stock'] <= safety_stock:
        # Emergency restock to bring stock back to a healthy level
        restock_quantity = product_info['stock_range'][1] - product_info['current_stock']
        product_info['current_stock'] += restock_quantity

    # Regular timed restock check
    if (order_date - product_info['last_restock']).days >= restock_intervals[selected_product_id]:
        restock_quantity = random.randint(product_info['stock_range'][0] // 2, product_info['stock_range'][0])
        product_info['current_stock'] += restock_quantity
        product_info['last_restock'] = order_date

    # Adjust quantity based on demand season
    if 'spike' in product_info['demand_pattern'] and product_weights[selected_product_id] > 1:
        if product_info['price_inr'] > 20000:
            quantity_ordered = random.randint(3, 10)
        else:
            quantity_ordered = random.randint(10, 30)
    else: # Off-season
        if product_info['price_inr'] > 20000:
            quantity_ordered = random.randint(1, 2)
        else:
            quantity_ordered = random.randint(1, 5)

    # Simulate price increase
    price = product_info['price_inr']
    years_passed = order_year - start_date.year
    if years_passed > 0:
        price = int(price * (1.02 ** years_passed))

    # Ensure ordered quantity doesn't exceed stock
    quantity_ordered = min(quantity_ordered, product_info['current_stock'])

    # Skip if quantity is 0 (this will be very rare now)
    if quantity_ordered == 0:
        continue

    # Append data to the list
    data.append([
        f'O{str(order_id_counter).zfill(4)}',
        order_year,
        order_month,
        order_day,
        selected_product_id,
        product_info['title'],
        product_info['category'],
        product_info['current_stock'],
        random.choice(clients),
        price,
        quantity_ordered
    ])

    # Update current stock for the next transaction
    product_info['current_stock'] -= quantity_ordered

    order_id_counter += 1
    rows_generated += 1

In [None]:
# Create a DataFrame and save to CSV
columns = ['order_id', 'order_year', 'order_month', 'order_day', 'product_id', 'product_title', 'product_category',
           'current_product_stock', 'client_id', 'price_inr', 'quantity_ordered']
df = pd.DataFrame(data, columns=columns)

In [None]:
# Sort the DataFrame by date
df.sort_values(by=['order_year', 'order_month', 'order_day'], inplace=True)


In [None]:
# Save the DataFrame to a CSV file
df.to_csv('inventory_dataset.csv', index=False)
print("Dataset successfully generated and saved to inventory_dataset.csv")

Dataset successfully generated and saved to inventory_dataset.csv
