In [1]:
import random
from faker import Faker
import pandas as pd
fake = Faker()

In [2]:
NUM_EXPENSES = 30
NUM_PROFITS = 5000
NUM_SALES = 5000
NUM_TRANSACTIONS = 5000
NUM_CLIENTS = 3000
NUM_EMPLOYEES = 50
NUM_HOUSES = 1000
NUM_OFFICES = 3

# Helper function to create random amounts
def generate_amount(transaction_type):
    if transaction_type == "Sale":
        return random.uniform(1000000, 20000000)
    else:  # Lease
        return random.uniform(800, 10000)

In [3]:
# Generate expenses data
expenses_data = []
expense_descriptions = [
    "development expense", "marketing expense", "other expense",
    "infrastructure expense", "operating expense"
]

for expense_id in range(1, NUM_EXPENSES + 1):
    expenses_data.append({
        "expense_id": expense_id,
        "description": random.choice(expense_descriptions),
        "amount": round(random.uniform(1000, 50000), 2),
        "expense_date": fake.date_between(start_date="-1y", end_date="today"),
        "office_id": random.randint(1, NUM_OFFICES),
    })

In [4]:
# Generate transactions, profits, and sales data
transactions_data = []
profits_data = []
sales_data = []

for transaction_id in range(1, NUM_TRANSACTIONS + 1):
    transaction_type = random.choice(["Sale", "Lease"])
    amount = round(generate_amount(transaction_type), 2)
    transaction_date = fake.date_between(start_date="-2y", end_date="today")

    # Create transaction entry
    transactions_data.append({
        "transaction_id": transaction_id,
        "employee_id": random.randint(1, NUM_EMPLOYEES),
        "transaction_type": transaction_type,
        "transaction_date": transaction_date,
        "amount": amount,
        "house_id": random.randint(1, NUM_HOUSES),
        "client_id": random.randint(1, NUM_CLIENTS),
    })

    # Create profit entry with amount less than transaction amount
    net_profit = amount * 0.015
    profits_data.append({
        "profit_id": transaction_id,
        "transaction_id": transaction_id,
        "net_profit": net_profit,
        "profit_date": transaction_date,
    })

    # Create sales entry
    sale_price = amount
    listing_price = round(sale_price * random.uniform(1.0, 1.3), 2)
    sales_data.append({
        "sale_id": transaction_id,
        "transaction_id": transaction_id,
        "listing_price": listing_price,
        "sale_price": sale_price,
        "sale_date": transaction_date,
    })

In [5]:
# Create DataFrames
expenses_df = pd.DataFrame(expenses_data)
transactions_df = pd.DataFrame(transactions_data)
profits_df = pd.DataFrame(profits_data)
sales_df = pd.DataFrame(sales_data)

In [6]:
# Display initial rows of each dataframe
print("\nTransactions Data:")
print(transactions_df.head())
print("Expenses Data:")
print(expenses_df.head())
print("\nProfits Data:")
print(profits_df.head())
print("\nSales Data:")
print(sales_df.head())


Transactions Data:
   transaction_id  employee_id transaction_type transaction_date       amount  \
0               1            7             Sale       2023-03-23  12249127.61   
1               2            2             Sale       2022-09-23  14219287.38   
2               3           39            Lease       2023-02-28      5760.21   
3               4           30             Sale       2022-10-22  12593858.48   
4               5           33            Lease       2024-02-17      4473.08   

   house_id  client_id  
0       695        979  
1       442       1711  
2       847       1949  
3       720       2768  
4       554       1418  
Expenses Data:
   expense_id        description    amount expense_date  office_id
0           1  operating expense  28353.30   2024-04-18          3
1           2  marketing expense  39044.75   2023-09-07          2
2           3      other expense   2138.94   2024-07-16          3
3           4  marketing expense  38103.73   2024-01-01     

In [7]:
# Save to CSV if needed
transactions_df.to_csv('/Users/tuntun/Downloads/transactions.csv', index=False)
expenses_df.to_csv('/Users/tuntun/Downloads/expenses.csv', index=False)
profits_df.to_csv('/Users/tuntun/Downloads/profits.csv', index=False)
sales_df.to_csv('/Users/tuntun/Downloads/sales.csv', index=False)