In [9]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta  # Import datetime and timedelta

def generate_sales_data(num_records):
    np.random.seed(42)

    end_date = datetime.now()
    start_date = end_date - timedelta(days=730)
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')

    products = ['Laptop', 'Smartphone', 'Tablet', 'Headphones', 'Monitor', 'Keyboard', 'Mouse', 'Printer', 'Camera', 'Smartwatch']

    categories = ['Electronics', 'Electronics', 'Electronics', 'Electronics', 'Audio', 'Electronics', 'Accessories', 'Accessories', 'Office', 'Electronics', 'Wearables']

    regions = ['North America', 'Europe', 'Asia', 'South America', 'Africa', 'Australia']

    data = {
        'order_id': range(1000, 1000 + num_records),
        'order_date': np.random.choice(date_range, num_records),
        'product': np.random.choice(products, num_records),
        'category': np.random.choice(categories, num_records),
        'quantity': np.random.randint(1, 5, num_records),
        'unit_price' : np.random.uniform(50, 2000, num_records),
        'region': np.random.choice(regions, num_records),
        'customer_age': np.random.randint(18, 70, num_records),
        'customer_gender': np.random.choice(['Male', 'Female'], num_records)
    }

    df = pd.DataFrame(data)

    df['total_sales'] = df['quantity'] * df['unit_price']

    nov_dec_mask = (df['order_date'].dt.month == 11) | (df['order_date'].dt.month == 12)
    df.loc[nov_dec_mask, 'total_sales'] = df.loc[nov_dec_mask, 'total_sales'] * 1.5

    return df


sales_df = generate_sales_data(1000)
print("Sample Sales Data")
print(sales_df.head())
print(f"\nDataset Shape : {sales_df.shape}")

Sample Sales Data
   order_id                 order_date   product     category  quantity  \
0      1000 2023-12-20 22:31:43.141544    Camera       Office         2   
1      1001 2024-11-17 22:31:43.141544    Laptop        Audio         3   
2      1002 2024-06-05 22:31:43.141544    Camera  Electronics         1   
3      1003 2023-12-24 22:31:43.141544   Printer        Audio         3   
4      1004 2023-11-19 22:31:43.141544  Keyboard  Electronics         1   

    unit_price         region  customer_age customer_gender  total_sales  
0  1434.764092           Asia            25          Female  4304.292276  
1  1877.887954  North America            66          Female  8450.495795  
2  1226.419021  South America            42          Female  1226.419021  
3   973.131719      Australia            67          Female  4379.092735  
4   166.447153         Africa            29            Male   249.670730  

Dataset Shape : (1000, 10)
