In [None]:
import os
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

import pandas as pd
import numpy as np

from tqdm import tqdm
from vnstock import Vnstock

###############################################################################
# 1. Logging / Display Configuration
###############################################################################
logging.getLogger("vnstock").setLevel(logging.CRITICAL)
logging.disable(logging.CRITICAL)
pd.set_option('display.max_rows', None)  # Display all rows

###############################################################################
# 2. Function Definitions
###############################################################################

def get_session(tstamp):
    """
    Label each 15-minute bar as either 'morning' (9:15-13:00),
    'afternoon' (13:15-14:45), or None (not in these sessions).
    """
    import datetime as dt
    t = tstamp.time()
    if dt.time(9, 15) <= t <= dt.time(13, 0):
        return 'morning'
    elif dt.time(13, 15) <= t <= dt.time(14, 45):
        return 'afternoon'
    else:
        return None

def fetch_price_history(ticker, start_date=None, end_date=None):
    """
    Fetch 15-minute price data for `ticker` using Vnstock (source=VCI),
    then aggregate it into half-day bars:
      - morning:   9:15 -- 13:00
      - afternoon: 13:15 -- 14:45

    Returns a DataFrame of half-day bars [time, open, high, low, close, volume].
    The 'time' is set to the *last timestamp* within that half-day session
    (i.e. 13:00 for morning, 14:45 for afternoon).
    """
    if end_date is None:
        end_date = datetime.today().strftime('%Y-%m-%d')

    try:
        # 1) Fetch 15-minute data from Vnstock
        stock_obj = Vnstock().stock(symbol=ticker, source='VCI')
        # Use interval='15m':
        df = stock_obj.quote.history(interval='15m', start=start_date, end=end_date, to_df=True)

        if df.empty:
            return pd.DataFrame()

        # 2) Clean up, ensure time ascending
        df['time'] = pd.to_datetime(df['time'])
        df = df.sort_values(by='time', ascending=True)

        # 3) Label each 15min bar as morning/afternoon
        df['session'] = df['time'].apply(get_session)

        # 4) Keep only morning/afternoon rows
        df_filtered = df.dropna(subset=['session'])

        # 5) Group by (date, session):
        #      - date = df['time'].dt.date
        #      - session = 'morning' or 'afternoon'
        #    Then aggregate open=first, high=max, low=min, close=last, volume=sum.
        #    Also capture the 'time' (last 15-min bar in the group) to represent that half-day.
        df_halfday = (
            df_filtered
            .groupby([df_filtered['time'].dt.date, 'session'])
            .agg({
                'time': 'last',    # We'll keep the last time within that half-day group
                'open': 'first',
                'high': 'max',
                'low': 'min',
                'close': 'last',
                'volume': 'sum'
            })
            .reset_index(drop=True)  # remove old index
        )

        # 6) Sort descending if you like
        df_halfday = df_halfday.sort_values(by='time', ascending=False)
        return df_halfday

    except Exception as e:
        print(f"Failed to fetch data for {ticker}: {e}")
        return pd.DataFrame()


def fetch_data_for_ticker(ticker, start_date, end_date):
    """
    Fetch half-day data for a single ticker, returning a DataFrame with
    [time, open, high, low, close, volume, symbol].
    """
    try:
        df = fetch_price_history(ticker, start_date, end_date)
        if not df.empty:
            df['symbol'] = ticker
        return df
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()


def calculate_cmf(df, period=20):
    """
    Calculate Chaikin Money Flow (CMF) for a single symbol DataFrame
    over half-day bars. period=20 => 20 half-day bars (not days).
    - Sorts ascending by time
    - Applies standard CMF formula
    - Sorts descending before returning
    """
    # Sort oldest->newest
    df = df.sort_values(by='time', ascending=True)

    # Money Flow Multiplier (MFM)
    # (close - low) - (high - close) / (high - low)
    df['mfm'] = (
        ((df['close'] - df['low']) - (df['high'] - df['close'])) /
        (df['high'] - df['low']).replace(0, np.nan)
    )

    # Money Flow Volume (MFV) = MFM * volume
    df['mfv'] = df['mfm'] * df['volume']

    # Rolling sums for CMF
    df['cmf'] = (
        df['mfv'].rolling(window=period).sum() /
        df['volume'].rolling(window=period).sum()
    )

    # Sort newest->oldest
    df = df.sort_values(by='time', ascending=False)
    return df


def check_cmf_condition(df):
    """
    For the CMF condition, we do a 5-bar rolling MIN on 'cmf' 
    (i.e. last 5 half-days):
      - min_last_5_cmf: the rolling min of last 5 CMF values
      - cmf_diff: cmf - min_last_5_cmf
      - is_cmf_condition_met: True if cmf_diff > 0.2 and min_last_5_cmf is not NaN
    Returns the DataFrame sorted ascending by time.
    """
    df = df.sort_values(by='time', ascending=True)

    df['min_last_5_cmf'] = df['cmf'].rolling(window=5).min()
    df['cmf_diff'] = df['cmf'] - df['min_last_5_cmf']
    df['is_cmf_condition_met'] = (df['cmf_diff'] > 0.2) & (df['min_last_5_cmf'].notna())

    return df


###############################################################################
# 3. Main Workflow
###############################################################################

def main():
    # Print message to indicate start of the program
    print("Starting CMF Indicator Automation...")

    # Define your date range
    start_date = (datetime.today() - pd.DateOffset(days=40)).strftime('%Y-%m-%d')
    end_date = datetime.today().strftime('%Y-%m-%d')

    # Get symbol list from Vnstock.
    print("Fetching symbol list...")
    stock = Vnstock().stock(source='VCI')
    df_symbols = stock.listing.symbols_by_exchange()
    filtered_df = df_symbols[df_symbols['exchange'].isin(['HSX','HNX'])]
    symbol_list = filtered_df['symbol'].tolist()
    print("Number of symbols:", len(symbol_list))
    print("Symbols List:", symbol_list)
    print("=====================================")

    # (Optionally restrict to a smaller sample for quick tests)
    # symbol_list = ['VNM', 'VIC', 'VHM', 'VRE', 'VJC']

    ###########################################################################
    # 3.1 Parallel data fetching with progress bar
    ###########################################################################
    price_data_list = []
    num_cores = 3

    with ThreadPoolExecutor(max_workers=num_cores) as executor:
        future_to_ticker = {
            executor.submit(fetch_data_for_ticker, ticker, start_date, end_date): ticker
            for ticker in symbol_list
        }
        
        for future in tqdm(as_completed(future_to_ticker), 
                           total=len(symbol_list), 
                           desc="Fetching data"):
            ticker = future_to_ticker[future]
            try:
                data = future.result()
                if not data.empty:
                    price_data_list.append(data)
            except Exception as e:
                print(f"Failed to process {ticker}: {e}")

    # Combine all DataFrames
    if not price_data_list:
        print("No data fetched.")
        return

    price_data = pd.concat(price_data_list, ignore_index=True)    
    print("Data fetched successfully.")
    print("=====================================")

    # 3.2 Calculate CMF (over half-day bars)
    price_data = (
        price_data
        .groupby('symbol', group_keys=False)
        .apply(calculate_cmf, period=20)  # 20 half-day bars
    )

    # 3.3 Drop temporary columns
    price_data.drop(columns=['mfm', 'mfv'], inplace=True)

    # 3.4 Check CMF conditions (5 half-day rolling window)
    price_data = (
        price_data
        .groupby('symbol', group_keys=False)
        .apply(check_cmf_condition)
    )

    # 3.5 Get the latest row (most recent half-day) per symbol
    latest_data = (
        price_data
        .sort_values(by='time', ascending=False)
        .drop_duplicates(subset=['symbol'])
    )

    # Show the final columns
    print(latest_data[['symbol', 'time', 'open', 'high', 'low', 
                       'close', 'volume', 'cmf', 'min_last_5_cmf', 
                       'cmf_diff', 'is_cmf_condition_met']])
    print("=====================================")

    # Identify symbols that meet the condition
    symbols_to_buy = latest_data[latest_data['is_cmf_condition_met']]['symbol'].tolist()
    print('Symbols to buy:', symbols_to_buy)


# 4. Entry Point
if __name__ == "__main__":
    main()


Starting CMF Indicator Automation...
Fetching symbol list...
Number of symbols: 557
Symbols List: ['YEG', 'YBM', 'VTP', 'VTO', 'VTB', 'VSI', 'VSH', 'VSC', 'VRE', 'VRC', 'VPS', 'VPI', 'VPH', 'VPG', 'VPD', 'VPB', 'VOS', 'VNS', 'VNM', 'VNL', 'VNG', 'VNE', 'VND', 'VMD', 'VJC', 'VIX', 'VIP', 'VID', 'VIC', 'VIB', 'VHM', 'VHC', 'VGC', 'VFG', 'VDS', 'VDP', 'VCI', 'VCG', 'VCF', 'VCB', 'VCA', 'VAF', 'UIC', 'TYA', 'TVT', 'TVS', 'TVB', 'TV2', 'TTF', 'TTE', 'TTA', 'TSC', 'TRC', 'TRA', 'TPC', 'TPB', 'TNT', 'TNI', 'TNH', 'TNC', 'TN1', 'TMT', 'TMS', 'TMP', 'TLH', 'TLG', 'TLD', 'TIX', 'TIP', 'THG', 'TEG', 'TDW', 'TDP', 'TDM', 'TDH', 'TDG', 'TDC', 'TCT', 'TCR', 'TCO', 'TCM', 'TCL', 'TCI', 'TCH', 'TCD', 'TCB', 'TBC', 'SZL', 'SZC', 'SVT', 'SVI', 'SVD', 'SVC', 'STK', 'STG', 'STB', 'ST8', 'SSI', 'SSC', 'SSB', 'SRF', 'SRC', 'SPM', 'SMC', 'SMB', 'SMA', 'SKG', 'SJS', 'SJD', 'SIP', 'SHP', 'SHI', 'SHB', 'SHA', 'SGT', 'SGR', 'SGN', 'SFI', 'SFG', 'SFC', 'SCS', 'SCR', 'SC5', 'SBV', 'SBT', 'SBG', 'SBA', 'SAV', 'SAM'

Fetching data:  24%|██▍       | 133/557 [00:47<03:28,  2.04it/s]

Failed to fetch data for RDP: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  37%|███▋      | 204/557 [01:15<01:55,  3.06it/s]

Failed to fetch data for LEC: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  40%|███▉      | 221/557 [01:21<02:07,  2.64it/s]

Failed to fetch data for ITA: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  52%|█████▏    | 287/557 [01:45<01:10,  3.84it/s]

Failed to fetch data for FUEBFVND: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  57%|█████▋    | 320/557 [01:56<01:05,  3.62it/s]

Failed to fetch data for DRH: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  59%|█████▉    | 329/557 [02:00<01:23,  2.74it/s]