In [1]:
# import hopsworks
# import synth_transactions as st
# from datetime import datetime, timedelta


# num_merchants=5000
# num_banks=5000
# num_accounts=50000
# num_cards=70000
# num_transactions=2000000
# current_date = datetime(2025, 10, 5)  # one day after most recent data
# transactions_start_date = current_date - timedelta(days=30)
# issue_date = current_date - timedelta(days=365*3)
# expiry_date = current_date + timedelta(days=365*3)
# account_creation_start_date = current_date - timedelta(days=365*5)
# account_last_modified_start_date = current_date - timedelta(days=365)
# bank_last_modified_start_date = current_date - timedelta(days=365)
# merchant_last_modified_start_date = current_date - timedelta(days=365)

# # Connect to Hopsworks
# project = hopsworks.login()
# fs = project.get_feature_store()


# # Generate all DataFrames
# print("Starting data generation process...")

# # 1. Generate merchant details
# merchant_df = st.generate_merchant_details(start_date=merchant_last_modified_start_date, end_date=current_date)
# merchant_fg = st.create_feature_group_with_descriptions(
#     fs,
#     merchant_df, 
#     "merchant_details", 
#     "Details about merchants that execute transactions",
#     ["merchant_id"],
#     "last_modified"
# )

# # 2. Generate bank details  
# bank_df = st.generate_bank_details(start_date=bank_last_modified_start_date, end_date=current_date)
# bank_fg = st.create_feature_group_with_descriptions(
#     fs,
#     bank_df,
#     "bank_details",
#     "Details about banks that issue credit cards", 
#     ["bank_id"],
#     "last_modified"
# )

# # 3. Generate account details
# account_df = st.generate_account_details(account_creation_start_date=account_creation_start_date, 
#                                          current_date=current_date, 
#                                          account_last_modified_start_date=account_last_modified_start_date)
# account_fg = st.create_feature_group_with_descriptions(
#     fs,
#     account_df,
#     "account_details",
#     "Information about the account and card",
#     ["account_id"], 
#     "last_modified"
# )

# # 4. Generate card details
# card_df = st.generate_card_details(current_date=current_date, issue_date=issue_date, expiry_date=expiry_date)
# card_fg = st.create_feature_group_with_descriptions(
#     fs,
#     card_df,
#     "card_details", 
#     "Information about the account and card",
#     ["cc_num"],
#     "last_modified",
#     topic_name=f"{project.name}_card_details"
# )

# # 5. Generate credit card transactions
# # transactions_df = st.generate_credit_card_transactions(start_date=transactions_start_date, end_date=current_date)
# # transactions_fg = st.create_feature_group_with_descriptions(
# #     fs,
# #     transactions_df,
# #     "credit_card_transactions",
# #     "Details about credit card transactions",
# #     ["t_id"],
# #     "ts",
# #     topic_name=f"{project.name}_credit_card_transactions"
# # )

# transactions_df = st.generate_credit_card_transactions_from_existing(
#     card_df=card_df,
#     merchant_df=merchant_df,
#     start_date=transactions_start_date,
#     end_date=current_date,
#     rows=num_transactions,
#     tid_offset=0,
#     seed=42,
# )
# transactions_fg = st.create_feature_group_with_descriptions(
#     fs,
#     transactions_df,
#     "credit_card_transactions",
#     "Details about credit card transactions",
#     ["t_id"],
#     "ts",
#     topic_name=f"{project.name}_credit_card_transactions"
# )


# print("All feature groups created successfully!")
# # Print summary statistics
# print("\nSummary Statistics:")
# print(f"Merchant Details: {len(merchant_df)} rows")
# print(f"Bank Details: {len(bank_df)} rows") 
# print(f"Account Details: {len(account_df)} rows")
# print(f"Card Details: {len(card_df)} rows")
# print(f"Credit Card Transactions: {len(transactions_df)} rows")



import hopsworks
from datetime import datetime, timedelta
import synth_transactions as st  # if you keep module separation, else import functions directly

# parameters (adjust as needed)
num_merchants = 5000
num_banks = 5000
num_accounts = 50000
num_cards = 70000
num_transactions = 2_000_000  # set lower for testing if needed

current_date = datetime(2025, 10, 5)
transactions_start_date = current_date - timedelta(days=30)
issue_date = current_date - timedelta(days=365 * 3)
expiry_date = current_date + timedelta(days=365 * 3)
account_creation_start_date = current_date - timedelta(days=365 * 5)
account_last_modified_start_date = current_date - timedelta(days=365)
bank_last_modified_start_date = current_date - timedelta(days=365)
merchant_last_modified_start_date = current_date - timedelta(days=365)

# Connect to Hopsworks (keeps original flow)
project = hopsworks.login()
fs = project.get_feature_store()

print("Starting data generation process...")

# 1. Merchants
merchant_df = st.generate_merchant_details(rows=num_merchants, start_date=merchant_last_modified_start_date, end_date=current_date)
merchant_fg = st.create_feature_group_with_descriptions(
    fs,
    merchant_df,
    "merchant_details",
    "Details about merchants that execute transactions",
    ["merchant_id"],
    "last_modified"
)

# 2. Banks
bank_df = st.generate_bank_details(rows=num_banks, start_date=bank_last_modified_start_date, end_date=current_date)
bank_fg = st.create_feature_group_with_descriptions(
    fs,
    bank_df,
    "bank_details",
    "Details about banks that issue credit cards",
    ["bank_id"],
    "last_modified"
)

# 3. Accounts
account_df = st.generate_account_details(rows=num_accounts,
                                      account_creation_start_date=account_creation_start_date,
                                      current_date=current_date,
                                      account_last_modified_start_date=account_last_modified_start_date)
account_fg = st.create_feature_group_with_descriptions(
    fs,
    account_df,
    "account_details",
    "Information about the account and card",
    ["account_id"],
    "last_modified"
)

# 4. Cards
card_df = st.generate_card_details(rows=num_cards,
                                num_accounts=num_accounts,
                                num_banks=num_banks,
                                current_date=current_date,
                                issue_date=issue_date,
                                expiry_date=expiry_date)
card_fg = st.create_feature_group_with_descriptions(
    fs,
    card_df,
    "card_details",
    "Information about the account and card",
    ["cc_num"],
    "last_modified",
    topic_name=f"{project.name}_card_details"
)

# 5. Transactions (FROM existing card + merchant)
transactions_df = st.generate_credit_card_transactions_from_existing(
    card_df=card_df,
    merchant_df=merchant_df,
    start_date=transactions_start_date,
    end_date=current_date,
    rows=num_transactions,
    tid_offset=0,
    seed=42
)
transactions_fg = st.create_feature_group_with_descriptions(
    fs,
    transactions_df,
    "credit_card_transactions",
    "Details about credit card transactions",
    ["t_id"],
    "ts",
    topic_name=f"{project.name}_credit_card_transactions"
)

print("All feature groups created successfully!")
print("\nSummary Statistics:")
print(f"Merchant Details: {len(merchant_df)} rows")
print(f"Bank Details: {len(bank_df)} rows")
print(f"Account Details: {len(account_df)} rows")
print(f"Card Details: {len(card_df)} rows")
print(f"Credit Card Transactions: {len(transactions_df)} rows")


2025-10-06 12:21:21,584 INFO: Initializing external client
2025-10-06 12:21:21,585 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-10-06 12:21:23,044 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/398
Starting data generation process...
Generating merchant details...
Creating feature group: merchant_details
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/398/fs/335/fg/1532498


Uploading Dataframe: 100.00% |███████████████████████████████████████████████████████████████████████████████████████| Rows 5000/5000 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: merchant_details_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/398/jobs/named/merchant_details_1_offline_fg_materialization/executions
  Added description for: merchant_id
  Added description for: category
  Added description for: country
  Added description for: cnt_chrgeback_prev_day
  Added description for: last_modified
Generating bank details...
Creating feature group: bank_details
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/398/fs/335/fg/1532499


Uploading Dataframe: 100.00% |███████████████████████████████████████████████████████████████████████████████████████| Rows 5000/5000 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: bank_details_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/398/jobs/named/bank_details_1_offline_fg_materialization/executions
  Added description for: bank_id
  Added description for: country
  Added description for: credit_rating
  Added description for: last_modified
Generating account details...
Creating feature group: account_details
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/398/fs/335/fg/1532500


Uploading Dataframe: 100.00% |█████████████████████████████████████████████████████████████████████████████████████| Rows 50000/50000 | Elapsed Time: 00:05 | Remaining Time: 00:00


Launching job: account_details_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/398/jobs/named/account_details_1_offline_fg_materialization/executions
  Added description for: account_id
  Added description for: name
  Added description for: address
  Added description for: debt_end_prev_month
  Added description for: last_modified
  Added description for: creation_date
  Added description for: end_date
Generating card details...
Creating feature group: card_details
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/398/fs/335/fg/1532501


Uploading Dataframe: 100.00% |█████████████████████████████████████████████████████████████████████████████████████| Rows 70000/70000 | Elapsed Time: 00:05 | Remaining Time: 00:00


Launching job: card_details_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/398/jobs/named/card_details_1_offline_fg_materialization/executions
  Added description for: cc_num
  Added description for: cc_expiry_date
  Added description for: account_id
  Added description for: bank_id
  Added description for: issue_date
  Added description for: card_type
  Added description for: status
  Added description for: last_modified
Generating credit card transactions from existing card + merchant tables...
Creating feature group: credit_card_transactions
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/398/fs/335/fg/1532502


Uploading Dataframe: 100.00% |█████████████████████████████████████████████████████████████████████████████████| Rows 2000000/2000000 | Elapsed Time: 00:54 | Remaining Time: 00:00


Launching job: credit_card_transactions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/398/jobs/named/credit_card_transactions_1_offline_fg_materialization/executions
  Added description for: t_id
  Added description for: cc_num
  Added description for: account_id
  Added description for: merchant_id
  Added description for: amount
  Added description for: ip_address
  Added description for: card_present
  Added description for: ts
All feature groups created successfully!

Summary Statistics:
Merchant Details: 5000 rows
Bank Details: 5000 rows
Account Details: 50000 rows
Card Details: 70000 rows
Credit Card Transactions: 2000000 rows
