In [2]:
from faker import Faker
import random
import yfinance as yf
from datetime import datetime, timedelta
import json
import os
import pandas as pd
import numpy as np

In [3]:
class settings:
    DATA_DIR = 'data/raw'
    SEED = 42

In [4]:
class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        # pandas / datetime
        if isinstance(obj, (pd.Timestamp, datetime)):
            return obj.strftime('%Y-%m-%d %H:%M:%S')
        # numpy types
        if isinstance(obj, (np.integer, np.int64)):
            return int(obj)
        if isinstance(obj, (np.floating, np.float64)):
            return float(obj)
        # pandas Series → dict
        if isinstance(obj, pd.Series):
            return obj.to_dict()
        return super().default(obj)

In [7]:
class FakeDataGenerator:
    def __init__(self, seed=None):
        # Initialize Faker with Indonesian locale
        self.faker = Faker('id_ID')
        # Seed for reproducibility
        if seed is not None:
            random.seed(seed)
            Faker.seed(seed)
        # In-memory storage
        self.db = {}

    def generate_users(self, num_records=50):
        """
        nik           : Nomor Induk Kependudukan
        nama          : Nama Lengkap
        email         : Alamat Email
        telepon       : Nomor Telepon
        password      : Kata Sandi
        nomor_rekening: Nomor Rekening
        kyc_status    : Status KYC
        saldo         : Saldo
        """
        users = []
        for _ in range(num_records):
            users.append({
                'nik': self.faker.ssn(),
                'nama': self.faker.name(),
                'email': self.faker.email(),
                'telepon': self.faker.phone_number(),
                'password': self.faker.password(),
                'nomor_rekening': self.faker.bban(),
                'kyc_status': self.faker.random_element(('verified', 'unverified')),
                'saldo': self.faker.random_int(100000, 10_000_000)
            })
        self.db['Users'] = users
        print("[USERS] Generated")
        return users

    def generate_stocks(self, tickers=None):
        """
        ticker         : Kode Saham
        nama_perusahaan: Nama
        sektor         : Sektor
        profil         : Profil
        tipe           : Tipe ('listed', 'IPO')
        """
        if not tickers:
            return []
        
        ipo_list = ["ELIT.JK", "BAYU.JK", "BINA.JK", "BOLT.JK", "WIFI.JK",
                   "DATA.JK", "VKTR.JK", "FORE.JK", "ISAT.JK", "EXCL.JK"]
        info_list = []
        for t in tickers:
            info = yf.Ticker(t).info
            # if t in stock ipo, set tipe to 'IPO'
            if t in ipo_list:
                tipe = 'IPO'
            else:
                tipe = 'listed'
            info_list.append({
                'ticker': t,
                'nama_perusahaan': info.get('longName', 'Unknown'),
                'sektor': info.get('sector', 'Unknown'),
                'profil': info.get('longBusinessSummary', 'Unknown'),
                'tipe': tipe
            })
        self.db['Stocks'] = info_list
        print("[STOCKS] Generated")
        return info_list

    def generate_stock_ipo(self):
        """
        ticker          : Kode Saham
        harga_penawaran : Harga Penawaran
        status_ipo      : open/closed
        tanggal_ipo     : Tanggal
        """
        tickers = ["ELIT.JK", "BAYU.JK", "BINA.JK", "BOLT.JK", "WIFI.JK",
                   "DATA.JK", "VKTR.JK", "FORE.JK", "ISAT.JK", "EXCL.JK"]
        ipos = []
        for t in tickers:
            ipos.append({
                'ticker': t,
                'harga_penawaran': self.faker.random_int(1000, 10000),
                'status_ipo': self.faker.random_element(('open', 'closed')),
                'tanggal_ipo': self.faker.date_between('-1y', 'today').strftime('%Y-%m-%d')
            })
        self.db['Stock_IPO'] = ipos
        print("[IPOS] Generated")
        return ipos

    def generate_moderators(self, num_records=20):
        """
        username: Nama Pengguna
        password: Kata Sandi
        """
        mods = []
        for _ in range(num_records):
            mods.append({
                'username': self.faker.user_name(),
                'password': self.faker.password()
            })
        self.db['Moderators'] = mods
        print("[MODERATORS] Generated")
        return mods

    def generate_stock_daily_price(self):
        """
        ticker : Kode Saham
        open   : Harga Pembukaan
        close  : Harga Penutupan
        highest: Harga Tertinggi
        lowest : Harga Terendah
        date   : Tanggal
        """
        tickers = ['BBCA.JK', 'TLKM.JK', 'UNVR.JK', 'BMRI.JK', 'ASII.JK',
                   'BRPT.JK', 'PGAS.JK', 'GGRM.JK', 'HMSP.JK', 'ITMG.JK',
                   'JPFA.JK', 'MDKA.JK', 'PTBA.JK', 'SMGR.JK', 'TINS.JK']
        data = []
        for t in tickers:
            df = yf.download(t, period='1y', interval='1d')

            df = df.reset_index()
            for _, row in df.iterrows():
                row['Date'] = pd.to_datetime(row['Date'])
                data.append({
                    'ticker': t,
                    'date': row['Date'][""].strftime('%Y-%m-%d'),
                    'open': row['Open'][t],
                    'close': row['Close'][t],
                    'highest': row['High'][t],
                    'lowest': row['Low'][t]
                })
        self.db['Stock_Daily_Price'] = data
        print("[DAILY STOCK PRICE] Generated")
        return data

    def generate_live_stock_price_change(self, num_records=1000, tickers=None):
        """
        ticker   : Kode Saham
        price    : Harga
        timestamp: Waktu perubahan
        """
        if not tickers:
            return []
        changes = []
        per_ticker = max(1, num_records // len(tickers))
        for t in tickers:
            price = self.faker.random_int(1000, 10000)
            ts = self.faker.date_time_this_year()
            changes.append({'ticker': t, 'price': price,
                           'timestamp': ts.strftime('%Y-%m-%d %H:%M:%S')})
            for _ in range(per_ticker-1):
                price = round(
                    price * (1 + self.faker.random.uniform(0.001, 0.03)), 2)
                ts += timedelta(seconds=self.faker.random_int(10, 60))
                changes.append({'ticker': t, 'price': price,
                               'timestamp': ts.strftime('%Y-%m-%d %H:%M:%S')})
        self.db['Live_Stock_Price_Change'] = changes
        print("[LIVE STOCK PRICE CHANGE] Generated")
        return changes

    def generate_posts(self, num_records=100, niks=None):
        """
        post_id  : int
        nik      : str
        isi      : teks
        timestamp: waktu posting
        """
        posts = []
        for pid in range(1, num_records+1):
            ts = self.faker.date_time_this_year()
            posts.append({
                'post_id': pid,
                'nik': random.choice(niks),
                'isi': self.faker.text(200),
                'timestamp': ts.strftime('%Y-%m-%d %H:%M:%S')
            })
        self.db['Posts'] = posts
        print("[POSTS] Generated")
        return posts

    def generate_comments(self, num_records=50, niks=None):
        """
        comment_id: int
        nik       : str
        isi       : teks
        timestamp : waktu posting
        """
        comments = []
        for cid in range(1, num_records+1):
            ts = self.faker.date_time_this_year()
            comments.append({
                'comment_id': cid,
                'nik': random.choice(niks),
                'isi': self.faker.text(100),
                'timestamp': ts.strftime('%Y-%m-%d %H:%M:%S')
            })
        self.db['Comments'] = comments
        print("[COMMENTS] Generated")
        return comments

    def generate_post_comments(self, num_records=50, post_ids=None, comment_ids=None):
        """
        comment_id: int
        post_id   : int
        post_id (one) - > comment_id (many)
        """
        if not post_ids or not comment_ids:
            return []
        comments = []
        for cid in random.sample(comment_ids, min(num_records, len(comment_ids))):
            post = random.choice(post_ids)
            post_ts = datetime.strptime(post['timestamp'], '%Y-%m-%d %H:%M:%S')
            c_ts = self.faker.date_time_between(
                start_date=post_ts, end_date='now')
            comments.append({
                'comment_id': cid,
                'post_id': post['post_id'],
            })
        self.db['Post_Comments'] = comments
        print("[POST COMMENTS] Generated")
        return comments

    def generate_comment_likes(self, num_records=50, comment_ids=None, niks=None):
        """
        comment_id: int
        nik       : str
        timestamp : waktu posting
        """
        if not comment_ids or not niks:
            return []
        likes = []
        for _ in range(num_records):
            comment_id = random.choice(comment_ids)
            # comment is int
            comment = next(
                (c for c in self.db['Comments'] if c['comment_id'] == comment_id), None)
            comment_ts = datetime.strptime(
                comment['timestamp'], '%Y-%m-%d %H:%M:%S')
            l_ts = self.faker.date_time_between(
                start_date=comment_ts, end_date='now')
            likes.append({
                'nik': random.choice(niks),
                'comment_id': comment['comment_id'],
                'timestamp': l_ts.strftime('%Y-%m-%d %H:%M:%S')
            })
        likes = list({(like['nik'], like['comment_id']): like for like in likes}.values())
        self.db['Comment_Likes'] = likes
        print("[COMMENT LIKES] Generated")
        return likes

    def generate_comment_replies(self, num_records=50, comment_ids=None):
        """
        comment_id_from: int
        comment_id_to  : int
        timestamp       : waktu posting
        """
        if not comment_ids:
            return []
        replies = []
        for _ in range(num_records):
            comment = random.choice(comment_ids)
            comment_ts = datetime.strptime(
                comment['timestamp'], '%Y-%m-%d %H:%M:%S')
            r_ts = self.faker.date_time_between(
                start_date=comment_ts, end_date='now')
            tmp = {
                'comment_id_from': comment['comment_id'],
                'comment_id_to': random.choice(comment_ids)['comment_id'],
                'timestamp': r_ts.strftime('%Y-%m-%d %H:%M:%S')
            }
            replies.append(tmp)

        self.db['Comment_Replies'] = replies
        print("[COMMENT REPLIES] Generated")
        return replies

    def generate_post_likes(self, num_records=50, posts=None, niks=None):
        """
        post_id  : int
        nik      : int
        timestamp: like setelah post
        """
        if not posts or not niks:
            return []
        likes = []
        # only one like from each user to each post
        for _ in range(num_records):
            post = random.choice(posts)
            post_ts = datetime.strptime(post['timestamp'], '%Y-%m-%d %H:%M:%S')
            l_ts = self.faker.date_time_between(
                start_date=post_ts, end_date='now')
            likes.append({
                'post_id': post['post_id'],
                'nik': random.choice(niks),
                'timestamp': l_ts.strftime('%Y-%m-%d %H:%M:%S')
            })
        # delete duplicates in post_id, nik
        likes = list({(like['post_id'], like['nik'])                     : like for like in likes}.values())

        self.db['Post_Likes'] = likes
        print("[POST LIKES] Generated")
        return likes

    def generate_monitoring_posts(self, num_records=25, post_ids=None, moderators=None):
        """
        post_id         : int
        moderator       : str
        approval_status : enum
        timestamp       : waktu persetujuan
        """
        if not post_ids or not moderators:
            return []
        monitors = []
        for pid in random.sample(post_ids, min(num_records, len(post_ids))):
            monitors.append({
                'post_id': pid,
                'moderator': random.choice(moderators)['username'],
                'approval_status': self.faker.random_element(('approved', 'rejected')),
                'timestamp': self.faker.date_time_this_year().strftime('%Y-%m-%d %H:%M:%S')
            })
        self.db['Monitoring_Posts'] = monitors
        print("[MONITORING POSTS] Generated")
        return monitors

    def generate_post_tagging(self, num_records=50, post_ids=None, tickers=None):
        """
        post_id: int
        ticker : str
        """
        if not post_ids or not tickers:
            return []
        tags = []
        
        for _ in range(num_records):
            post_id = random.choice(post_ids)
            ticker = random.choice(tickers)
            tags.append({
                'post_id': post_id,
                'ticker': ticker
            })
        # delete duplicates in post_id, ticker
        tags = list({(tag['post_id'], tag['ticker']): tag for tag in tags}.values())
        self.db['Post_Tags'] = tags
        print("[POST TAGS] Generated")
        return tags

    def generate_predictions(self, num_records=50, post_ids=None, tickers=None):
        """
        post_id: int
        ticker : str
        price  : int
        durasi : int
        """
        if not post_ids or not tickers or num_records > len(post_ids):
            return []
        preds = []
        for pid in random.sample(post_ids, num_records):
            preds.append({
                'post_id': pid,
                'ticker': random.choice(tickers),
                'price': self.faker.random_int(50, 1_000_000),
                'durasi': self.faker.random_int(1, 365)
            })
        self.db['Predictions'] = preds
        print("[PREDICTIONS] Generated")
        return preds

    def generate_orders(self, num_records=100, tickers=None, niks=None):
        """
        order_id    : int
        nik         : str
        ticker      : str
        tipe_order  : enum ('beli','jual')
        harga       : int
        jumlah_lembar: int
        status      : enum ('pending','partial','executed')
        """
        orders = []
        for oid in range(1, num_records+1):
            orders.append({
                'order_id': oid,
                'nik': random.choice(niks),
                'ticker': random.choice(tickers),
                'tipe_order': self.faker.random_element(('beli', 'jual')),
                'harga': self.faker.random_int(1000, 10000),
                'jumlah_lembar': self.faker.random_int(1, 100000),
                'status': self.faker.random_element(('pending', 'partial', 'executed'))
            })
        self.db['Orders'] = orders
        print("[ORDERS] Generated")
        return orders

    def generate_prediction_voting(self, num_records=50, post_ids=None, user_ids=None):
        """
        post_id: int
        nik: int
        tipe: enum ('yes','no')
        """
        if not post_ids or not user_ids:
            return []
        votes = []
        # sample without replacement

        for pid in random.sample(post_ids, min(num_records, len(post_ids))):
            votes.append({
                'nik': random.choice(user_ids),
                'post_id': pid,
                'tipe': self.faker.random_element(('yes', 'no'))
            })
        self.db['Prediction_Voting'] = votes
        print("[PREDICTION VOTING] Generated")
        return votes

    # num_records lebih banyak dari order untuk simulasi multiple tx
    def generate_transactions(self, num_records=150):
        """
        Generates transaction records based on existing orders.
        A transaction record should contain:
        order_id: int
        jumlah_lembar: int (jumlah lembar yang ditransaksikan pada event ini)
        harga: int (harga eksekusi, bisa sama atau beda sedikit dari harga order jika market order)
        waktu_eksekusi: datetime
        """
        if not self.db.get('Orders'):
            print("[TRANSACTIONS] No orders available to generate transactions from.")
            return []

        transactions = []
        # Initialize 'sisa_lembar' for all orders for this generation session.
        # This matches the original code's behavior of treating sisa_lembar as temporary.
        for order in self.db['Orders']:
            order['sisa_lembar'] = order.get('jumlah_lembar', 0)
            # If an order was previously 'partial' or 'executed' but is being re-evaluated
            # (e.g. if this function is called again with the same orders),
            # its status might need to be 'open' or 'partial' again if sisa_lembar > 0.
            # For simplicity, we'll rely on the candidate_order_indices filter.
            # If status was 'executed' but it still had 'jumlah_lembar' > 0, it implies
            # it wasn't fully processed or this is a new run.

        candidate_order_indices = [
            i for i, order in enumerate(self.db['Orders'])
            # Process orders that are 'open', or 'partial'.
            # If an order is 'executed' but sisa_lembar is > 0 (e.g. from re-initialization above),
            # it means it can still generate transactions.
            if order.get('status') in ('open', 'partial', 'executed') and order.get('sisa_lembar', 0) > 0
        ]

        if not candidate_order_indices:
            print(
                "[TRANSACTIONS] No orders with sisa_lembar > 0 available for transaction generation."
            )
            # Clean up 'sisa_lembar' before returning, as per original pattern
            for order_to_clean in self.db['Orders']:
                if 'sisa_lembar' in order_to_clean:
                    del order_to_clean['sisa_lembar']
            return []

        num_generated_tx = 0
        attempts = 0
        max_attempts = num_records * 10

        while num_generated_tx < num_records and attempts < max_attempts:
            attempts += 1
            if not candidate_order_indices:
                break

            order_idx_in_db = random.choice(candidate_order_indices)
            order_ref = self.db['Orders'][order_idx_in_db]

            lembar_transaksi = 0
            original_order_price = order_ref.get('harga', 0)

            # Simulate execution price.
            # For simplicity, using the order's price.
            # A slight variation could be added here if desired, e.g.:
            # execution_price = round(original_order_price * random.uniform(0.998, 1.002))
            # Ensure price is at least 1 or some minimum tick if applicable.
            execution_price = original_order_price
            if execution_price <= 0 : # If order price was invalid/zero
                execution_price = self.faker.random_int(min=100, max=10000) # Fallback


            if order_ref['sisa_lembar'] > 0:
                # If order status is 'executed' (meaning it should be fully filled now)
                # or if the remaining shares are less than a standard lot (e.g. < 100),
                # transact all remaining shares for that order.
                if order_ref['status'] == 'executed' or order_ref['sisa_lembar'] < 100:
                    lembar_transaksi = order_ref['sisa_lembar']
                else: # For 'open' or 'partial' orders, transact a portion
                    # Max 10 lots per transaction for simulation of partial fills
                    max_lembar_partial_tx = min(order_ref['sisa_lembar'], 10 * 100)
                    try:
                        # Ensure transactions are in multiples of 100 (lots), if sisa_lembar allows
                        if max_lembar_partial_tx >= 100:
                            lembar_transaksi = self.faker.random_int(1, max_lembar_partial_tx // 100) * 100
                        else: # If max_lembar_partial_tx < 100, take all of it
                            lembar_transaksi = max_lembar_partial_tx
                    except ValueError: # max_lembar_partial_tx // 100 might be 0
                         lembar_transaksi = max_lembar_partial_tx # take remaining

                # Final check to not exceed sisa_lembar
                lembar_transaksi = min(lembar_transaksi, order_ref['sisa_lembar'])


            if lembar_transaksi > 0:
                transactions.append({
                    'order_id': order_ref['order_id'],
                    'jumlah_lembar': lembar_transaksi,
                    'harga': execution_price, # This is the execution price for this transaction
                    'waktu_eksekusi': self.faker.date_time_this_year(before_now=True, after_now=False, tzinfo=None)
                })
                num_generated_tx += 1
                order_ref['sisa_lembar'] -= lembar_transaksi

                # Update order status based on sisa_lembar
                if order_ref['sisa_lembar'] == 0:
                    order_ref['status'] = 'executed' # Fully executed
                else:
                    order_ref['status'] = 'partial' # Partially executed

                # Refresh candidate_order_indices as sisa_lembar or status might have changed
                candidate_order_indices = [
                    i for i, order_item in enumerate(self.db['Orders'])
                    if order_item.get('status') in ('open', 'partial', 'executed') and order_item.get('sisa_lembar', 0) > 0
                ]
            elif order_ref['sisa_lembar'] == 0: # If no transaction made, but order is now empty
                # Refresh candidates if this order was somehow exhausted without a transaction in this iteration
                candidate_order_indices = [
                    i for i, order_item in enumerate(self.db['Orders'])
                    if order_item.get('status') in ('open', 'partial', 'executed') and order_item.get('sisa_lembar', 0) > 0
                ]


        if 'Transactions' not in self.db or not self.db['Transactions']:
            self.db['Transactions'] = transactions
        else:
            self.db['Transactions'].extend(transactions)

        # Clean up 'sisa_lembar' from order objects, as per the original code's pattern
        for order in self.db['Orders']:
            if 'sisa_lembar' in order:
                del order['sisa_lembar']

        print(
            f"[TRANSACTIONS] Generated {len(transactions)} new transactions. Total transactions in DB: {len(self.db['Transactions'])}")
        return transactions

    def generate_portfolio(self):
        """
        Menghasilkan data portofolio berdasarkan transaksi yang ada.
        Portofolio akan berisi:
        nik    : str   - Nomor Induk Kependudukan pemilik portofolio
        ticker : str   - Kode saham
        jumlah : int   - Jumlah total lembar saham yang dimiliki
        average_cost: float - Harga beli rata-rata per lembar saham
        """
        portfolios_data_aggregated = {
            # Struktur: {(nik, ticker): {'total_shares': 0, 'total_value_at_cost': 0.0}}
        }

        if not self.db.get('Transactions') or not self.db.get('Orders'):
            print(
                "[PORTFOLIO] No transactions or orders available to generate portfolio."
            )
            self.db['Portfolio'] = [] # Ensure portfolio is an empty list if not generated
            return []

        orders_map = {order['order_id']: order for order in self.db['Orders']}

        try:
            # CRITICAL: Sort transactions by execution time for correct rolling average cost
            sorted_transactions = sorted(
                self.db['Transactions'],
                key=lambda t: t['waktu_eksekusi']
            )
        except KeyError:
            print("[PORTFOLIO] Error: Transactions are missing 'waktu_eksekusi'. Cannot sort. Portfolio not generated.")
            self.db['Portfolio'] = []
            return []
        except TypeError:
            print("[PORTFOLIO] Error: 'waktu_eksekusi' in Transactions is not a sortable type. Portfolio not generated.")
            self.db['Portfolio'] = []
            return []


        for transaction in sorted_transactions:
            order_id = transaction.get('order_id')
            if not order_id:
                # Using f-string for transaction reference if available (removed id_transaksi)
                print(f"[PORTFOLIO] Warning: Transaction (details: {transaction}) missing 'order_id'. Skipping.")
                continue

            order_details = orders_map.get(order_id)

            if not order_details:
                print(
                    f"[PORTFOLIO] Warning: Order details not found for order_id {order_id} "
                    f"in transaction (details: {transaction}). Skipping."
                )
                continue

            nik = order_details.get('nik')
            ticker = order_details.get('ticker')
            tipe_order = order_details.get('tipe_order')

            if not all([nik, ticker, tipe_order]):
                print(f"[PORTFOLIO] Warning: Order {order_id} missing NIK, Ticker, or Tipe Order. Skipping transaction.")
                continue

            try:
                jumlah_transacted = int(transaction['jumlah_lembar'])
                # 'harga' in transaction is the actual execution price for that fill
                harga_transacted = float(transaction['harga'])
            except (KeyError, ValueError) as e:
                print(f"[PORTFOLIO] Warning: Transaction for order {order_id} "
                      f"has invalid/missing 'jumlah_lembar' or 'harga'. Skipping. Error: {e}")
                continue
            except TypeError as e: # Handles if jumlah_lembar or harga is None
                print(f"[PORTFOLIO] Warning: Transaction for order {order_id} "
                      f"has NoneType for 'jumlah_lembar' or 'harga'. Skipping. Error: {e}")
                continue


            portfolio_key = (nik, ticker)
            current_holding = portfolios_data_aggregated.setdefault(
                portfolio_key,
                {'total_shares': 0, 'total_value_at_cost': 0.0}
            )

            if tipe_order == 'beli':
                current_holding['total_shares'] += jumlah_transacted
                current_holding['total_value_at_cost'] += jumlah_transacted * harga_transacted
            elif tipe_order == 'jual':
                if current_holding['total_shares'] == 0:
                    print(f"[PORTFOLIO] Warning: NIK {nik} selling {jumlah_transacted} of {ticker} but owns 0. "
                          f"Order {order_id}. This might be a short sell or data issue.")
                    # In a simple long-only model, this sell has no impact on cost basis of owned shares.
                elif current_holding['total_shares'] < jumlah_transacted:
                    print(f"[PORTFOLIO] Warning: NIK {nik} selling {jumlah_transacted} of {ticker}, "
                          f"but only owns {current_holding['total_shares']}. Order {order_id}. "
                          f"All current holdings will be sold from portfolio cost perspective.")
                    # Cost of all shares currently held is removed.
                    current_holding['total_value_at_cost'] = 0.0
                    current_holding['total_shares'] = 0
                    # The remainder of the sale (jumlah_transacted - shares_previously_owned)
                    # is effectively a short sale if not disallowed, or an error.
                else: # Standard sell: Selling shares that are owned
                    average_cost_before_sale = current_holding['total_value_at_cost'] / current_holding['total_shares']
                    cost_of_shares_sold = jumlah_transacted * average_cost_before_sale

                    current_holding['total_value_at_cost'] -= cost_of_shares_sold
                    current_holding['total_shares'] -= jumlah_transacted

                # Clean up if shares are zero or negative (due to over-selling)
                if current_holding['total_shares'] <= 0:
                    current_holding['total_shares'] = 0
                    current_holding['total_value_at_cost'] = 0.0

        final_portfolios = []
        for (nik, ticker), data in portfolios_data_aggregated.items():
            jumlah = data['total_shares']
            if jumlah > 0:
                average_cost = data['total_value_at_cost'] / jumlah
                final_portfolios.append({
                    'nik': nik,
                    'ticker': ticker,
                    'jumlah': int(jumlah),
                    'average_cost': round(average_cost, 2)
                })

        self.db['Portfolio'] = final_portfolios
        print(
            f"[PORTFOLIO] Generated {len(final_portfolios)} portfolio entries."
        )
        # delete harga in transactions
        for transaction in self.db['Transactions']:
            if 'harga' in transaction:
                del transaction['harga']
        return final_portfolios
    
    def _get_stock_ipo_price(self, ticker):
        """
        Helper function to get the IPO price for a given ticker.
        Returns the price if found, otherwise returns a default value.
        """
        ipo_data = self.db.get('Stock_IPO', [])
        for ipo in ipo_data:
            if ipo['ticker'] == ticker:
                return ipo['harga_penawaran']
        return None
    
    def generate_order_ipo(self, num_records=20, niks=[], tickers=[]):
        """
        nik: str
        ticker : str
        jumlah_lembar: int
        waktu_pesanan: datetime
        status_pesanan: ENUM('pending', 'approved', 'rejected', 'allocated', 'failed')
        total_nilai_pesanan: int
        
        PK : nik, ticker
        one user can only order one ticker in one IPO
        """
        if not niks or not tickers:
            return []
        niks = niks.copy()
        tickers = tickers.copy()
        
        orders = set()
        for _ in range(num_records):
            nik = random.choice(niks)
            ticker = random.choice(tickers)
            # Ensure unique nik-ticker pairs
            while (nik, ticker) in orders:
                nik = random.choice(niks)
                ticker = random.choice(tickers)
            orders.add((nik, ticker))
            
            jumlah_lembar = self.faker.random_int(100, 10000)
            waktu_pesanan = self.faker.date_time_this_year()
            status_pesanan = self.faker.random_element(
                ('pending', 'approved', 'rejected', 'allocated', 'failed')
            )
            
            self.db.setdefault('Order_IPO', []).append({
                'nik': nik,
                'ticker': ticker,
                'jumlah_lembar': jumlah_lembar,
                'waktu_pesanan': waktu_pesanan.strftime('%Y-%m-%d %H:%M:%S'),
                'status_pesanan': status_pesanan,
                'total_nilai_pesanan': jumlah_lembar * self._get_stock_ipo_price(ticker)
            })
        print(f"[ORDER IPO] Generated {len(self.db.get('Order_IPO', []))} orders.")
        return self.db.get('Order_IPO', [])

    def save_to_json(self, data_dir):
        """
        Simpan semua data dalam self.db ke file JSON di data_dir,
        dan cetak file apa saja yang berhasil dibuat.
        """
        os.makedirs(data_dir, exist_ok=True)
        written_files = []
        for key, val in self.db.items():
            filename = f"{key}.json"
            path = os.path.join(data_dir, filename)
            try:
                with open(path, 'w', encoding='utf-8') as f:
                    json.dump(val, f, ensure_ascii=False,
                              indent=2, cls=CustomJSONEncoder)
                print(f"[SAVE] {filename} → {path}")
                written_files.append(path)
            except Exception as e:
                print(f"[ERROR] Gagal menyimpan {filename}: {e!r}")

        if not written_files:
            print("[SAVE] Tidak ada file yang tersimpan.")
        else:
            print(
                f"[SAVE] Total {len(written_files)} file tersimpan di '{data_dir}'")
            
        return written_files

    def gen(self, data_dir=None):
        users = self.generate_users()
        niks = [u['nik'] for u in users]
        mods = self.generate_moderators()
        daily = self.generate_stock_daily_price()
        ipos = self.generate_stock_ipo()
        daily_tickers = list(set([d['ticker'] for d in daily]))
        ipos_tickers = list(set([i['ticker'] for i in ipos]))
        all_tickers = list(set(daily_tickers+ipos_tickers))
        live = self.generate_live_stock_price_change(
            1000, [d['ticker'] for d in daily])
        stocks = self.generate_stocks(tickers=all_tickers)
        posts = self.generate_posts(niks=niks)
        post_ids = [p['post_id'] for p in posts]
        comments = self.generate_comments(num_records=50, niks=niks)
        comment_ids = [c['comment_id'] for c in comments]
        comments = self.generate_post_comments(
            post_ids=posts, comment_ids=comment_ids)
        comments = self.generate_comment_likes(
            num_records=50, comment_ids=comment_ids, niks=niks)
        comments = self.generate_comment_replies(
            num_records=50, comment_ids=comments)
        likes = self.generate_post_likes(posts=posts, niks=niks)
        monitors = self.generate_monitoring_posts(post_ids=post_ids, moderators=[
                                                  {'username': m['username']} for m in mods])
        tags = self.generate_post_tagging(
            post_ids=post_ids, tickers=all_tickers)
        preds = self.generate_predictions(
            num_records=50, post_ids=post_ids, tickers=daily_tickers)
        orders = self.generate_orders(
            num_records=100, tickers=all_tickers, niks=niks)
        votes = self.generate_prediction_voting(post_ids=[p['post_id'] for p in preds], user_ids=[
                                                u['nik'] for u in users], num_records=50)
        transactions = self.generate_transactions(num_records=300)
        portfolios   = self.generate_portfolio()
        order_ipo = self.generate_order_ipo(num_records=20, niks=niks, tickers=ipos_tickers)
        return self.db


generator = FakeDataGenerator(seed=settings.SEED)
generator.gen(settings.DATA_DIR)
generator.save_to_json(settings.DATA_DIR)

[USERS] Generated
[MODERATORS] Generated


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[DAILY STOCK PRICE] Generated
[IPOS] Generated
[LIVE STOCK PRICE CHANGE] Generated
[STOCKS] Generated
[POSTS] Generated
[COMMENTS] Generated
[POST COMMENTS] Generated
[COMMENT LIKES] Generated
[COMMENT REPLIES] Generated
[POST LIKES] Generated
[MONITORING POSTS] Generated
[POST TAGS] Generated
[PREDICTIONS] Generated
[ORDERS] Generated
[PREDICTION VOTING] Generated
[TRANSACTIONS] Generated 300 new transactions. Total transactions in DB: 300
[PORTFOLIO] Generated 34 portfolio entries.
[ORDER IPO] Generated 20 orders.
[SAVE] Users.json → data/raw\Users.json
[SAVE] Moderators.json → data/raw\Moderators.json
[SAVE] Stock_Daily_Price.json → data/raw\Stock_Daily_Price.json
[SAVE] Stock_IPO.json → data/raw\Stock_IPO.json
[SAVE] Live_Stock_Price_Change.json → data/raw\Live_Stock_Price_Change.json
[SAVE] Stocks.json → data/raw\Stocks.json
[SAVE] Posts.json → data/raw\Posts.json
[SAVE] Comments.json → data/raw\Comments.json
[SAVE] Post_Comments.json → data/raw\Post_Comments.json
[SAVE] Comment_L

['data/raw\\Users.json',
 'data/raw\\Moderators.json',
 'data/raw\\Stock_Daily_Price.json',
 'data/raw\\Stock_IPO.json',
 'data/raw\\Live_Stock_Price_Change.json',
 'data/raw\\Stocks.json',
 'data/raw\\Posts.json',
 'data/raw\\Comments.json',
 'data/raw\\Post_Comments.json',
 'data/raw\\Comment_Likes.json',
 'data/raw\\Comment_Replies.json',
 'data/raw\\Post_Likes.json',
 'data/raw\\Monitoring_Posts.json',
 'data/raw\\Post_Tags.json',
 'data/raw\\Predictions.json',
 'data/raw\\Orders.json',
 'data/raw\\Prediction_Voting.json',
 'data/raw\\Transactions.json',
 'data/raw\\Portfolio.json',
 'data/raw\\Order_IPO.json']

In [6]:
# import matplotlib.pyplot as plt
# import pandas as pd
# import seaborn as sns


# def plot_live_stock_price_change(data, ticker):
#     # Filter data for the specified ticker
#     ticker_data = [item for item in data if item['ticker'] == ticker]

#     if not ticker_data:
#         print(f"No data found for ticker '{ticker}'.")
#         return

#     # Convert to DataFrame
#     df = pd.DataFrame(ticker_data)

#     if 'date' not in df.columns:
#         print(
#             f"'date' column not found in data. Available columns: {df.columns.tolist()}")
#         print("Sample data:", df.head())
#         return

#     # Convert date to datetime
#     df['date'] = pd.to_datetime(df['date'])

#     # Plotting
#     plt.figure(figsize=(12, 6))
#     sns.lineplot(data=df, x='date', y='open')
#     plt.title(f'Live Stock Price Change for {ticker}')
#     plt.xlabel('Timestamp')
#     plt.ylabel('Price')
#     plt.xticks(rotation=45)
#     plt.tight_layout()
#     plt.show()


# plot_live_stock_price_change(db['Stock_Daily_Price'], 'PTBA.JK')