In [None]:
# pip install faker

In [2]:
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta

# 創建 Faker 實例
fake = Faker('en_US')

# 輸出文件名
OUTPUT_FILE = "schema_data.xlsx"

# CATEGORY
def generate_categories():
    categories = ["Music", "Sports", "Theater", "Art", "Comedy", "Dance"]
    data = [{"c_id": idx + 1, "c_name": name} for idx, name in enumerate(categories)]
    return pd.DataFrame(data)

# ORGANIZER
def generate_organizers(num=5):
    data = []
    for i in range(1, num + 1):
        data.append({
            "o_id": i,
            "o_name": fake.company(),
            "contact_info": fake.phone_number()  # 美國格式電話號碼
        })
    return pd.DataFrame(data)

def generate_customers(num=10):
    email_providers = ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com", "icloud.com"]
    roles = ["User", "Admin"]
    role_weights = [0.99, 0.01]  # User 的概率为 99%，Admin 的概率为 1%
    
    # 指定前两行数据
    data = [
        {
            "cu_id": 1,
            "cu_name": "admin",  # 自定义用户名
            "email": "admin@google.com",  # 自定义邮箱
            "phone_number": "123-456-7890",  # 自定义电话
            "address": "123 Custom Street, City, Country",  # 自定义地址
            "pwd": "1234",  # 自定义密码
            "role": "Admin"  # 自定义角色
        },
        {
            "cu_id": 2,
            "cu_name": "test user",
            "email": "test_user@google.com",
            "phone_number": "098-765-4321",
            "address": "456 Custom Avenue, City, Country",
            "pwd": "1234",
            "role": "User"
        }
    ]
    
    # 添加随机生成的数据
    for i in range(3, num + 3):  # 从3开始编号，避免ID冲突
        local_part = fake.user_name()
        domain = random.choice(email_providers)
        email = f"{local_part}@{domain}"  # 自定义更真实的邮箱地址
        data.append({
            "cu_id": i,
            "cu_name": fake.name(),
            "email": email,
            "phone_number": fake.phone_number(),  # 美国格式电话号码
            "address": fake.address(),           # 美国格式地址
            "pwd": fake.password(),
            "role": random.choices(roles, weights=role_weights, k=1)[0]  # 按权重选择角色
        })
    
    return pd.DataFrame(data)

# VENUE
def generate_venues(num=5):
    data = []
    for i in range(1, num + 1):
        data.append({
            "v_id": i,
            "v_name": fake.city(),
            "address": fake.address(),           # 美國格式地址
            "capacity": random.randint(50, 500),
            "contact_info": fake.phone_number()  # 美國格式電話號碼
        })
    return pd.DataFrame(data)

# EVENT
def generate_events(categories, organizers, num=10):
    data = []
    for i in range(1, num + 1):
        data.append({
            "e_id": i,
            "e_name": fake.sentence(nb_words=3),
            "c_id": random.choice(categories["c_id"]),
            "o_id": random.choice(organizers["o_id"]),
            "e_datetime": datetime.now() + timedelta(days=random.randint(1, 365)),
            "e_location": fake.city(),
            "description": fake.text()
        })
    return pd.DataFrame(data)

# EVENT_VENUE
def generate_event_venues(events, venues):
    data = []
    for e_id in events["e_id"]:
        v_id = random.choice(venues["v_id"])
        data.append({
            "e_id": e_id,
            "v_id": v_id,
            "arrangement": fake.text(max_nb_chars=50)
        })
    return pd.DataFrame(data)

# TICKET
def generate_tickets(events, num=20):
    data = []
    for i in range(1, num + 1):
        total_quantity = random.randint(50, 500)
        remain_quantity = random.randint(0, total_quantity)  # 确保 remain_quantity 不超过 total_quantity
        t_type = random.choice(["Standard", "VIP"])

        # 确保 VIP 的票价高于 Standard 票价
        if t_type == "VIP":
            price = random.randint(100, 200)  # VIP 票价较高
        else:
            price = random.randint(20, 100)  # Standard 票价较低
        
        data.append({
            "t_id": i,  # Ensure 't_id' is included here
            "e_id": random.choice(events["e_id"]),
            "t_type": t_type,
            "price": price,
            "total_quantity": total_quantity,
            "remain_quantity": remain_quantity
        })
    return pd.DataFrame(data)


# ORDER
def generate_orders(customers, num=100000):
    data = []
    for i in range(1, num + 1):
        data.append({
            "or_id": i,
            "cu_id": random.choice(customers["cu_id"]),
            "or_date": datetime.now() - timedelta(days=random.randint(1, 365)),
            "total_amount": round(random.uniform(50, 500), 2),
            "payment_status": random.choice(["Pending", "Paid", "Failed"]),
            "is_canceled": random.choice([True, False])
        })
    return pd.DataFrame(data)

# ORDER_DETAIL
def generate_order_details(orders, tickets):
    data = []
    for or_id in orders["or_id"]:
        total_amount = orders.loc[orders["or_id"] == or_id, "total_amount"].values[0]
        num_details = random.randint(1, 3)  # 每个订单包含 1-3 条详情记录
        ticket_ids = random.sample(list(tickets["t_id"]), num_details)
        quantities = [random.randint(1, 5) for _ in range(num_details)]
        
        # 按比例分配 total_amount 给各 subtotal
        total_quantity = sum(quantities)
        subtotals = [round(total_amount * (q / total_quantity), 2) for q in quantities]
        
        # 调整最后一项的 subtotal 确保总和一致
        subtotals[-1] += round(total_amount - sum(subtotals), 2)
        
        for t_id, quantity, subtotal in zip(ticket_ids, quantities, subtotals):
            data.append({
                "or_id": or_id,
                "t_id": t_id,
                "quantity": quantity,
                "subtotal": subtotal
            })
    return pd.DataFrame(data)

# PAYMENT
def generate_payments(orders, num=5):
    data = []
    for i in range(1, num + 1):
        # 随机选择一个订单
        or_id = random.choice(orders["or_id"])
        # 获取该订单的 total_amount
        total_amount = orders.loc[orders["or_id"] == or_id, "total_amount"].values[0]
        data.append({
            "p_id": i,
            "or_id": or_id,
            "payment_method": random.choice([
                "Credit Card", "PayPal", "Bank Transfer", "Apple Pay", "Google Pay", 
                "Samsung Pay"
            ]),
            "payment_datetime": datetime.now() - timedelta(days=random.randint(1, 365)),
            "amount": total_amount  # 使用订单的 total_amount
        })
    return pd.DataFrame(data)


# 將數據保存到 Excel
def save_to_excel(tables):
    with pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl") as writer:
        for sheet_name, df in tables.items():
            df.to_excel(writer, index=False, sheet_name=sheet_name)
    print(f"Data has been saved to {OUTPUT_FILE}")

# 主函數
def main():
    categories = generate_categories()
    organizers = generate_organizers()
    customers = generate_customers()
    venues = generate_venues()
    events = generate_events(categories, organizers)
    event_venues = generate_event_venues(events, venues)
    tickets = generate_tickets(events)
    orders = generate_orders(customers)
    order_details = generate_order_details(orders, tickets)
    payments = generate_payments(orders)

    tables = {
        "CATEGORY": categories,
        "ORGANIZER": organizers,
        "CUSTOMER": customers,
        "VENUE": venues,
        "EVENT": events,
        "EVENT_VENUE": event_venues,
        "TICKET": tickets,
        "ORDER": orders,
        "ORDER_DETAIL": order_details,
        "PAYMENT": payments
    }

    save_to_excel(tables)

if __name__ == "__main__":
    main()


Data has been saved to schema_data.xlsx


In [4]:
import pandas as pd
import psycopg2
from psycopg2.extras import execute_values

# 连接到 PostgreSQL 数据库
def connect_to_postgresql():
    return psycopg2.connect(
        dbname="ticketsystem",  # 替换为你的数据库名
        user="postgres",   # 替换为你的用户名
        password="1234",  # 替换为你的密码
        host="localhost",       # 或其他主机名
        port="5432"             # PostgreSQL 默认端口
    )

# 将 DataFrame 插入 PostgreSQL
def insert_dataframe_to_table(conn, df, table_name):
    try:
        # 获取列名
        columns = list(df.columns)
        values = [tuple(x) for x in df.to_numpy()]
        
        # 如果表名是保留字或包含特殊字符，用双引号包裹
        if table_name.upper() in ["ORDER"]:  # 添加需要特别处理的表名
            table_name = f'"{table_name}"'
        
        # 创建插入 SQL
        insert_query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES %s"
        
        with conn.cursor() as cur:
            execute_values(cur, insert_query, values)
        conn.commit()
        print(f"Data inserted into {table_name} successfully.")
    except Exception as e:
        print(f"Error inserting data into table {table_name}: {e}")
        raise

# 主函数
def main():
    # 读取 Excel 文件
    data = pd.read_excel("schema_data.xlsx", sheet_name=None)  # 加载所有表
    
    # 连接到 PostgreSQL
    conn = connect_to_postgresql()
    
    try:
        # 遍历 Excel 中的所有表
        for table_name, df in data.items():
            print(f"Inserting data into table: {table_name}...")
            insert_dataframe_to_table(conn, df, table_name)
    except Exception as e:
        print("An error occurred during the process:")
        print(e)
    finally:
        conn.close()

if __name__ == "__main__":
    main()


Inserting data into table: CATEGORY...
Data inserted into CATEGORY successfully.
Inserting data into table: ORGANIZER...
Data inserted into ORGANIZER successfully.
Inserting data into table: CUSTOMER...
Data inserted into CUSTOMER successfully.
Inserting data into table: VENUE...
Data inserted into VENUE successfully.
Inserting data into table: EVENT...
Data inserted into EVENT successfully.
Inserting data into table: EVENT_VENUE...
Data inserted into EVENT_VENUE successfully.
Inserting data into table: TICKET...
Data inserted into TICKET successfully.
Inserting data into table: ORDER...
Data inserted into "ORDER" successfully.
Inserting data into table: ORDER_DETAIL...
Data inserted into ORDER_DETAIL successfully.
Inserting data into table: PAYMENT...
Data inserted into PAYMENT successfully.
