In [2]:
import pandas as pd
import numpy as np

In [3]:
# --- Configuration ---
NUM_ROWS = 50
ITEMS_PREFIX = ['Product', 'Service', 'Client', 'Region']
DEPARTMENTS = ['Sales', 'Marketing', 'Operations', 'Finance', 'HR']
CATEGORIES = ['Electronics', 'Software', 'Consulting', 'Logistics', 'Supplies', 'Others']

In [4]:
# --- Generate Data for Q1 (FY 2025-26) ---
np.random.seed(42) # for reproducibility

data_q1 = {
    'Item': [f'{np.random.choice(ITEMS_PREFIX)} {i:03d}' for i in range(NUM_ROWS)],
    'Department': np.random.choice(DEPARTMENTS, NUM_ROWS),
    'Category': np.random.choice(CATEGORIES, NUM_ROWS),
    'Revenue': np.random.randint(10000, 100000, NUM_ROWS).astype(float),
    'Expenses': np.random.randint(3000, 40000, NUM_ROWS).astype(float),
    'Units Sold': np.random.randint(50, 1000, NUM_ROWS),
    'Customer Count': np.random.randint(10, 500, NUM_ROWS)
}
df_q1 = pd.DataFrame(data_q1)
df_q1['Profit'] = df_q1['Revenue'] - df_q1['Expenses']


In [5]:
# Ensure 'Item' is unique, if duplicates happen due to random choice, regenerate or ensure uniqueness later
df_q1 = df_q1.drop_duplicates(subset=['Item']).reset_index(drop=True)
# If still less than NUM_ROWS, add more items (simple approach, for robustness handle complex item generation)
while len(df_q1) < NUM_ROWS:
    new_item = f'{np.random.choice(ITEMS_PREFIX)} {len(df_q1):03d}_new'
    if new_item not in df_q1['Item'].values:
        df_q1 = pd.concat([df_q1, pd.DataFrame([{
            'Item': new_item,
            'Department': np.random.choice(DEPARTMENTS),
            'Category': np.random.choice(CATEGORIES),
            'Revenue': np.random.randint(10000, 100000),
            'Expenses': np.random.randint(3000, 40000),
            'Units Sold': np.random.randint(50, 1000),
            'Customer Count': np.random.randint(10, 500),
            'Profit': np.random.randint(10000, 100000) - np.random.randint(3000, 40000)
        }])], ignore_index=True)


In [6]:
# --- Generate Data for Q2 (FY 2025-26) ---
# We'll base Q2 data on Q1, introducing some realistic variances (up, down, stable)
data_q2 = []
for index, row in df_q1.iterrows():
    item_name = row['Item']
    
    # Introduce some new items in Q2 (not in Q1)
    if np.random.rand() < 0.1: # 10% chance to skip an old item and introduce a new one later
        continue

    q2_revenue = row['Revenue'] * (1 + np.random.uniform(-0.15, 0.25)) # -15% to +25% change
    q2_expenses = row['Expenses'] * (1 + np.random.uniform(-0.10, 0.20)) # -10% to +20% change
    q2_units_sold = int(row['Units Sold'] * (1 + np.random.uniform(-0.15, 0.25)))
    q2_customer_count = int(row['Customer Count'] * (1 + np.random.uniform(-0.10, 0.20)))

    data_q2.append({
        'Item': item_name,
        'Department': row['Department'], # Keep department same
        'Category': row['Category'], # Keep category same
        'Revenue': max(100, q2_revenue), # Ensure no negative values
        'Expenses': max(50, q2_expenses),
        'Units Sold': max(1, q2_units_sold),
        'Customer Count': max(1, q2_customer_count)
    })

df_q2 = pd.DataFrame(data_q2)
df_q2['Profit'] = df_q2['Revenue'] - df_q2['Expenses']

In [8]:
# Add some completely new items for Q2 that were not in Q1
num_new_items_q2 = int(NUM_ROWS * 0.1) # e.g., 10% new items
for i in range(num_new_items_q2):
    new_item_name = f'New Item Q2 {i+1}'
    df_q2 = pd.concat([df_q2, pd.DataFrame([{
        'Item': new_item_name,
        'Department': np.random.choice(DEPARTMENTS),
        'Category': np.random.choice(CATEGORIES),
        'Revenue': np.random.randint(5000, 80000),
        'Expenses': np.random.randint(2000, 30000),
        'Units Sold': np.random.randint(30, 800),
        'Customer Count': np.random.randint(5, 400),
        'Profit': np.random.randint(5000, 80000) - np.random.randint(2000, 30000)
    }])], ignore_index=True)


In [9]:
# Ensure Q2 has approximately NUM_ROWS, if items were skipped above
while len(df_q2) < NUM_ROWS:
    new_item = f'{np.random.choice(ITEMS_PREFIX)} {len(df_q2):03d}_fill'
    if new_item not in df_q2['Item'].values and new_item not in df_q1['Item'].values: # ensure it's truly new
         df_q2 = pd.concat([df_q2, pd.DataFrame([{
            'Item': new_item,
            'Department': np.random.choice(DEPARTMENTS),
            'Category': np.random.choice(CATEGORIES),
            'Revenue': np.random.randint(10000, 100000),
            'Expenses': np.random.randint(3000, 40000),
            'Units Sold': np.random.randint(50, 1000),
            'Customer Count': np.random.randint(10, 500),
            'Profit': np.random.randint(10000, 100000) - np.random.randint(3000, 40000)
        }])], ignore_index=True)


In [10]:
# Trim or pad if necessary to ensure exactly NUM_ROWS (optional, for strict requirement)
df_q1 = df_q1.head(NUM_ROWS) if len(df_q1) > NUM_ROWS else df_q1
df_q2 = df_q2.head(NUM_ROWS) if len(df_q2) > NUM_ROWS else df_q2


In [14]:
# --- Save to Excel ---
output_q1_filename = 'quarter1_FY25-26.xlsx'
output_q2_filename = 'quarter2_FY25-26.xlsx'

In [15]:
df_q1.to_excel(output_q1_filename, index=False, sheet_name='Q1_Data')
df_q2.to_excel(output_q2_filename, index=False, sheet_name='Q2_Data')

In [16]:
print(f"Generated '{output_q1_filename}' with {len(df_q1)} rows.")
print(f"Generated '{output_q2_filename}' with {len(df_q2)} rows.")
print("\nSample of Q1 Data:")
print(df_q1.head())
print("\nSample of Q2 Data:")
print(df_q2.head())

Generated 'quarter1_FY25-26.xlsx' with 50 rows.
Generated 'quarter2_FY25-26.xlsx' with 50 rows.

Sample of Q1 Data:
          Item  Department     Category  Revenue  Expenses  Units Sold  \
0   Client 000  Operations     Supplies  64268.0   20675.0         312   
1   Region 001     Finance  Electronics  97939.0   26938.0         813   
2  Product 002     Finance     Supplies  28141.0   26664.0         193   
3   Client 003       Sales     Supplies  90356.0    4636.0         395   
4   Client 004  Operations  Electronics  81910.0   23080.0         673   

   Customer Count   Profit  
0             217  43593.0  
1             454  71001.0  
2             178   1477.0  
3             493  85720.0  
4             170  58830.0  

Sample of Q2 Data:
          Item  Department     Category       Revenue      Expenses  \
0   Client 000  Operations     Supplies  59180.819658  24440.694369   
1   Region 001     Finance  Electronics  83853.672226  31746.313632   
2  Product 002     Finance     S