In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Helper functions
def random_date(start, end):
    return start + timedelta(seconds=np.random.randint(0, int((end - start).total_seconds())))

def generate_sample_dates(start_date, end_date, num_records):
    return [random_date(start_date, end_date).strftime('%Y-%m-%d') for _ in range(num_records)]

def generate_time():
    time = datetime.strptime('00:00', '%H:%M') + timedelta(minutes=np.random.randint(0, 1440))
    return time.strftime('%H:%M')

In [6]:
# Use context manager to write Excel file
with pd.ExcelWriter('../data/sample_data.xlsx', engine='openpyxl') as writer:

    # 1. Daily DFR
    pd.DataFrame({
        'REPORT_DATE': generate_sample_dates(datetime(2021, 4, 1), datetime(2025, 4, 30), 20),
        'START_TIME': [generate_time() for _ in range(20)],
        'FINISH_TIME': [generate_time() for _ in range(20)],
        'ELAPSED_HOURS': np.random.uniform(1, 12, 20).round(2)
    }).to_excel(writer, sheet_name='Daily DFR', index=False)

    # 2. Volume All
    pd.DataFrame({
        'Date': generate_sample_dates(datetime(2021, 4, 1), datetime(2025, 4, 30), 20),
        'Auth': np.random.randint(1000, 5000, 20),
        'Txn': np.random.randint(100, 1000, 20)
    }).to_excel(writer, sheet_name='Volume All', index=False)

    # 3. Volume (Subm)-Top Merchants
    pd.DataFrame({
        'Month': pd.date_range('2021-04-01', periods=20, freq='M').strftime('%Y-%m'),
        'Metropolitan (MTA)': np.random.randint(10000, 50000, 20)
    }).to_excel(writer, sheet_name='Volume (Subm)-Top Merchants', index=False)

    # For 3-year monthly data sheets (April 2021-April 2025)
    months = pd.date_range('2021-04-01', '2025-04-30', freq='M').strftime('%Y-%m').tolist()
    entities = ['Sales', 'Marketing', 'Finance']
    companies = ['Company A', 'Company B', 'Company C']
    reports = ['Monthly Sales', 'Quarterly Financial', 'Marketing Analysis']
    frequencies = ['Monthly', 'Weekly', 'Daily']

    common_data = {
        'COMPANY_ID': np.random.randint(1000, 9999, 20),
        'ENTITY': np.random.choice(entities, 20),
        'BREAKDOWN_LVL': np.random.choice(['High', 'Medium', 'Low'], 20),
        'ENTITY_ID': np.random.randint(100, 999, 20),
        'DB_KEY': np.random.randint(100000, 999999, 20),
        'COMPANY_NAME': np.random.choice(companies, 20),
        'REPORT_NAME': np.random.choice(reports, 20),
        'REPORT_DATE': np.random.choice(months, 20),
        'FREQUENCY': np.random.choice(frequencies, 20),
        'START_TIME': [generate_time() for _ in range(20)],
        'FINISH_TIME': [generate_time() for _ in range(20)],
        'ELAPSED_HOURS': np.random.uniform(1, 24, 20).round(2)
    }

    # Generate sheets
    sheet_names = [
        'Daily Top Web Reports', 'Daily Top GRPT DFR Reports', 
        'Top monthly web reports', 'Top monthly Grpt DFR reports (>10 hrs)'
    ]

    for name in sheet_names:
        data = common_data.copy()
        if 'BREAKDOWN_LVL' not in name:
            data.pop('BREAKDOWN_LVL')
        df = pd.DataFrame(data)
        if name == 'Top monthly Grpt DFR reports (>10 hrs)':
            df = df[df['ELAPSED_HOURS'] > 10]
        df.to_excel(writer, sheet_name=name, index=False)

    # Monthend Duration-Web, Grpt
    pd.DataFrame({
        'REPORT_DATE': months[:20],
        'START_TIME': [generate_time() for _ in range(20)],
        'FINISH_TIME': [generate_time() for _ in range(20)],
        'MAX_DURATION_HOURS': np.random.uniform(5, 20, 20).round(2)
    }).to_excel(writer, sheet_name='Monthend Duration Web GRPT', index=False)

    # Daily Web
    pd.DataFrame({
        'REPORT_DATE': generate_sample_dates(datetime(2021, 4, 1), datetime(2025, 4, 30), 20),
        'START_TIME': [generate_time() for _ in range(20)],
        'FINISH_TIME': [generate_time() for _ in range(20)],
        'ELAPSED_HOURS': np.random.uniform(1, 12, 20).round(2)
    }).to_excel(writer, sheet_name='Daily Web', index=False)

print("Excel file 'sample_data.xlsx' created successfully!")

Excel file 'sample_data.xlsx' created successfully!


  'Month': pd.date_range('2021-04-01', periods=20, freq='M').strftime('%Y-%m'),
  months = pd.date_range('2021-04-01', '2025-04-30', freq='M').strftime('%Y-%m').tolist()
