#### Install and Import Dependencies

In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random

fake = Faker('en_GB') 
print("Libraries imported successfully.")

Libraries imported successfully.


#### Configuration & Event Mapping

In [2]:
# Data Configuration
NUM_RECORDS = 5000

# Updated products reflecting the 2026 Dubai Luxury & Tech Market
PRODUCTS = [
    'Smart Watch Ultra',      # Electronics (High demand in Tech-savvy Dubai)
    'Designer Leather Tote',   # Luxury Fashion (Competitor benchmark heavy)
    'Oud Silk Mood Perfume',   # Premium Beauty (Huge during Ramadan/Eid)
    'Limited Edition Sneaker', # Gen Z/Tourist High-velocity stock
    'Organic Saffron Giftset'  # Gourmet (High demand during Dubai Shopping Festival)
]

DUBAI_EVENTS = {
    'None': 1.0, 
    'Dubai Shopping Festival': 2.8, # Significant peak for tourists
    'Ramadan': 2.0,                # High gifting season
    'Black Friday': 3.5,            # Major tech/electronics peak
    'Eid Al-Fitr': 2.5              # Fashion and Beauty peak
}

print(f"Configured {len(PRODUCTS)} high-value products for the Dubai market.")

Configured 5 high-value products for the Dubai market.


#### The Data Generation Engine

In [3]:
raw_data = []

for _ in range(NUM_RECORDS):
    event = random.choice(list(DUBAI_EVENTS.keys()))
    multiplier = DUBAI_EVENTS[event]
    
    # Simulate realistic sales based on event multiplier
    base_sales = random.randint(10, 50)
    sales_qty = int(base_sales * multiplier)
    
    record = {
        'Transaction_ID': fake.uuid4(),
        'Date': fake.date_between(start_date='-1y', end_date='today'),
        'Product_Name': random.choice(PRODUCTS),
        'Stock_On_Hand': random.randint(50, 500),
        'Sales_Qty': sales_qty,
        'Unit_Price_AED': random.uniform(100, 2000),
        'Event': event,
        'Store_Location': random.choice(['Dubai Mall', 'Mall of the Emirates', 'Dubai Marina'])
    }
    raw_data.append(record)

df = pd.DataFrame(raw_data)
print("Initial clean dataset generated.")

Initial clean dataset generated.


#### "The Sabotage" (Injecting Irregularities)

In [4]:
# 1. Create Duplicates 
df = pd.concat([df, df.sample(n=100)], ignore_index=True)

# 2. Inject Negative Stock (Impossible in real life)
df.loc[df.sample(frac=0.01).index, 'Stock_On_Hand'] = -99

# 3. Inject Missing Values (Data integrity check)
df.loc[df.sample(frac=0.02).index, 'Event'] = np.nan

# 4. Outlier: Impossible Sales Quantity
df.loc[df.sample(n=5).index, 'Sales_Qty'] = 99999

print("Irregularities injected: Duplicates, Negative Stock, NaNs, and Outliers.")

Irregularities injected: Duplicates, Negative Stock, NaNs, and Outliers.


#### 5: Initial Inspection

In [5]:
# Pre-Cleaning Assessment
print("Dataset Shape:", df.shape)
print("\nMissing Values Count:")
print(df.isnull().sum())

# Show the 'irregular' rows
irregular_stock = df[df['Stock_On_Hand'] < 0]
print(f"\nFound {len(irregular_stock)} rows with negative stock.")

df.head()

Dataset Shape: (5100, 8)

Missing Values Count:
Transaction_ID      0
Date                0
Product_Name        0
Stock_On_Hand       0
Sales_Qty           0
Unit_Price_AED      0
Event             102
Store_Location      0
dtype: int64

Found 51 rows with negative stock.


Unnamed: 0,Transaction_ID,Date,Product_Name,Stock_On_Hand,Sales_Qty,Unit_Price_AED,Event,Store_Location
0,64dd9d72-7d38-4ff3-bee8-6bedc17534c3,2025-07-10,Limited Edition Sneaker,87,33,1842.712287,,Mall of the Emirates
1,5a94fe14-cfde-4250-ad41-83e5179fa39a,2025-12-05,Smart Watch Ultra,193,88,1540.522023,Ramadan,Mall of the Emirates
2,dcd00375-d025-43ee-8071-157ab4e73442,2025-12-03,Designer Leather Tote,134,95,1086.283552,Eid Al-Fitr,Mall of the Emirates
3,f44b724c-8e0f-4b5b-9d40-b292a52f4241,2025-08-13,Organic Saffron Giftset,276,112,205.307084,Eid Al-Fitr,Dubai Marina
4,48c63a56-7584-4e78-b10b-d6da8db7c4a9,2025-09-10,Oud Silk Mood Perfume,194,18,1687.874846,,Dubai Mall


#### Export Raw Data

In [6]:
df.to_csv('raw_retail_data.csv', index=False)

print("'raw_retail_data.csv' has been saved ")

'raw_retail_data.csv' has been saved 
