In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# set random seed for reproducibility
random.seed(42)
np.random.seed(42)

In [3]:
# Define product categories and details
products = [
    {"Product_ID": "MGT001", "Category": "Men's Graphic Tees", "Product_Name": "Sunset Vibes Tee", "Brand": "Riot Society", "Price": 24.99},
    {"Product_ID": "M2F001", "Category": "Men's 2 for $34 Tees", "Product_Name": "Skater Logo Tee", "Brand": "Riot Spciety", "Price": 17.99},
    {"Product_ID": "MBG001", "Category": "Men's BOGO RSQ Jeans", "Product_Name": "Slim Fit Denim", "Brand": "RSQ", "Price": 49.99},
    {"Product_ID": "MNT001", "Category": "Men's Nike Tees", "Product_Name": "Nike Sportswear Tee", "Brand": "Nike", "Price": 34.99},
    {"Product_ID": "W4F001", "Category": "Women's 4 for $25 Tops", "Product_Name": "Basic Tank Top", "Brand": "RSQ", "Price": 9.99},
    {"Product_ID": "WBJ001", "Category": "Women's BOGO RSQ Jeans", "Product_Name": "High Rise Skinny", "Brand": "RSQ", "Price": 49.99},
    {"Product_ID": "BP001", "Category": "Backpacks", "Product_Name": "Right Pack Backpack", "Brand": "JanSport", "Price": 49.99},
    {"Product_ID": "BP002", "Category": "Backpacks", "Product_Name": "Heritage Backpack", "Brand": "Herschel", "Price": 59.99},
    {"Product_ID": "SH001", "Category": "Shoes", "Product_Name": "Old Skool Sneakers", "Brand": "Vans", "Price": 59.99},
    {"Product_ID": "SH002", "Category": "Shoes", "Product_Name": "Nike SB Chron", "Brand": "Nike", "Price": 129.99},
]

#payment methods
payment_methods = ["Cash", "Credit Card", "Debit Card", "Gift Card", "Mobile Payment"]

In [4]:
# Function to apply promotional pricing
def apply_promotions(product, quantity):
    if product['Category'] == "Men's 2 for $34 Tees" and quantity >= 2:
        return 34.00 * (quantity // 2) + product['Price'] * (quantity % 2)
    elif product['Category'] in ["Men's BOGO RSQ Jeans", "Women's BOGO RSQ Jeans"] and quantity >= 2:
        full_price_items = quantity // 2 + quantity % 2
        half_price_items = quantity // 2
        return round((full_price_items * product['Price']) + (half_price_items * product['Price'] * 0.5), 2)
    elif product['Category'] == "Women's 4 for $25 Tops" and quantity >= 2:
        return 25.00 * (quantity // 4) + product['Price'] * (quantity % 4)
    else:
        return product['Price'] * quantity

In [5]:
# Generate mock data
def generate_mock_data(num_transactions):
    data = []
    base_date = datetime(2024, 7, 15)
    store_location = "San Diego, CA"
    
    for i in range (1, num_transactions + 1):
        transaction_id = f"T{i:05d}"
        date_offset = random.randint(0, 60) # Random date within 2 months
        transaction_date = base_date + timedelta(days=date_offset)
        transaction_time = (datetime.min + timedelta(minutes=random.randint(600, 1260))).time() # Store hours 10 am- 9 pm
        
        product = random.choice(products)
        quantity = random.randint(1, 5)
        total_spent = round(apply_promotions(product, quantity), 2)
        payment_method = random.choice(payment_methods)
        
        data.append({
            "Transaction_ID": transaction_id,
            "Date": transaction_date.strftime("%Y-%m-%d"),
            "Time": transaction_time.strftime("%H:%M"),
            "Product_ID": product["Product_ID"],
            "Category": product["Category"],
            "Product_Name": product["Product_Name"],
            "Brand": product["Brand"],
            "Price": product["Price"],
            "Quantity": quantity,
            "Total_Spent": total_spent,
            "Payment_Method": payment_method,
            "Store_Location": store_location
        })
    
    return pd.DataFrame(data)

# Generate 1000 mock transactions
mock_sales_data = generate_mock_data(1000)
mock_sales_data.head()

Unnamed: 0,Transaction_ID,Date,Time,Product_ID,Category,Product_Name,Brand,Price,Quantity,Total_Spent,Payment_Method,Store_Location
0,T00001,2024-08-24,11:54,MGT001,Men's Graphic Tees,Sunset Vibes Tee,Riot Society,24.99,3,74.97,Credit Card,"San Diego, CA"
1,T00002,2024-07-29,12:22,M2F001,Men's 2 for $34 Tees,Skater Logo Tee,Riot Spciety,17.99,5,85.99,Cash,"San Diego, CA"
2,T00003,2024-08-21,17:12,MGT001,Men's Graphic Tees,Sunset Vibes Tee,Riot Society,24.99,1,24.99,Cash,"San Diego, CA"
3,T00004,2024-07-28,13:58,SH001,Shoes,Old Skool Sneakers,Vans,59.99,5,299.95,Cash,"San Diego, CA"
4,T00005,2024-08-19,13:23,SH001,Shoes,Old Skool Sneakers,Vans,59.99,4,239.96,Credit Card,"San Diego, CA"


In [7]:
# Save the dataset to a CSV file
mock_sales_data.to_csv("../data/raw/tillys_mock_sales_data.csv", index=False)
print("Mock sales data save to 'data/raw/tillys_mock_sales_data.csv'")

Mock sales data save to 'data/raw/tillys_mock_sales_data.csv'
