In [2]:
# -*- coding: utf-8 -*-
"""Colab_BI_Dashboard_Starter.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/... (your Colab notebook link)
"""

import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# --- Synthetic Data Generation ---

def generate_synthetic_data(num_rows=1000):
    """Generates synthetic sales data."""

    products = ['Laptop', 'Smartphone', 'Tablet', 'Headphones', 'Monitor']
    regions = ['North', 'South', 'East', 'West']
    categories = ['Electronics', 'Accessories']
    payment_methods = ['Credit Card', 'Debit Card', 'PayPal', 'Bank Transfer']

    data = {
        'OrderID': range(1, num_rows + 1),
        'Date': [datetime.now() - timedelta(days=random.randint(1, 365)) for _ in range(num_rows)],
        'ProductID': [random.randint(100, 500) for _ in range(num_rows)],
        'Product': [random.choice(products) for _ in range(num_rows)],
        'Region': [random.choice(regions) for _ in range(num_rows)],
        'Category': [random.choice(categories) if product in ['Headphones','Monitor'] else 'Electronics' for product in [random.choice(products) for _ in range(num_rows)] ],
        'Quantity': [random.randint(1, 10) for _ in range(num_rows)],
        'UnitPrice': [random.uniform(50, 2000) for _ in range(num_rows)],
        'PaymentMethod': [random.choice(payment_methods) for _ in range(num_rows)],
        'CustomerRating': [random.randint(1, 5) for _ in range(num_rows)]
    }

    df = pd.DataFrame(data)
    df['Sales'] = df['Quantity'] * df['UnitPrice']
    df['Date'] = pd.to_datetime(df['Date']).dt.date #remove time portion of datetime
    return df

# Generate the data
sales_data = generate_synthetic_data(2000) #Adjust number of rows as needed.

# --- Data Export for Tableau ---

# Export to CSV for Tableau Public
sales_data.to_csv('synthetic_sales_data.csv', index=False)

print("Synthetic data generated and saved to 'synthetic_sales_data.csv'")

# --- Optional Data Exploration in Colab ---
print("\nSample Data:")
print(sales_data.head())

print("\nData Types:")
print(sales_data.dtypes)

print("\nSummary Statistics:")
print(sales_data.describe())

print("\nUnique Regions:")
print(sales_data['Region'].unique())

Synthetic data generated and saved to 'synthetic_sales_data.csv'

Sample Data:
   OrderID        Date  ProductID     Product Region     Category  Quantity  \
0        1  2024-04-04        107  Smartphone  South  Electronics         6   
1        2  2024-11-08        179      Laptop  South  Electronics        10   
2        3  2024-12-04        219      Laptop  South  Electronics         1   
3        4  2024-10-17        135  Headphones  South  Accessories         3   
4        5  2024-12-30        239      Laptop   East  Electronics         9   

     UnitPrice  PaymentMethod  CustomerRating         Sales  
0  1792.421096     Debit Card               2  10754.526579  
1   349.331733     Debit Card               2   3493.317329  
2  1451.856235    Credit Card               5   1451.856235  
3   639.367008         PayPal               3   1918.101024  
4  1748.820114  Bank Transfer               2  15739.381028  

Data Types:
OrderID             int64
Date               object
ProductID