In [None]:

import os
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

import pandas as pd
import numpy as np

from tqdm import tqdm
from vnstock import Vnstock

###############################################################################
# 1. Logging / Display Configuration
###############################################################################
logging.getLogger("vnstock").setLevel(logging.CRITICAL)

logging.disable(logging.CRITICAL)

pd.set_option('display.max_rows', None)  # Display all rows

###############################################################################
# 2. Function Definitions
###############################################################################

def fetch_price_history(ticker, start_date=None, end_date=None):
    """
    Fetch historical price data for a given ticker using Vnstock, 
    from start_date to end_date. Returns a DataFrame with time, high, low, 
    close, volume columns.
    """
    if end_date is None:
        end_date = datetime.today().strftime('%Y-%m-%d')

    try:
        # Fetch data
        stock_obj = Vnstock().stock(symbol=ticker, source='VCI')
        df = stock_obj.quote.history(start=start_date, end=end_date, to_df=True)

        if df.empty:
            return pd.DataFrame()

        # Convert time to datetime, sort descending
        df['time'] = pd.to_datetime(df['time'])
        df = df.sort_values(by='time', ascending=False)

        return df[['time', 'high', 'low', 'close', 'volume']]

    except Exception as e:
        print(f"Failed to fetch data for {ticker}: {e}")
        return pd.DataFrame()


def fetch_data_for_ticker(ticker, start_date, end_date):
    """
    Fetch data for a single ticker, returning a DataFrame with 
    [time, high, low, close, volume, symbol].
    """
    try:
        df = fetch_price_history(ticker, start_date, end_date)
        if not df.empty:
            df['symbol'] = ticker
        return df
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()


def calculate_cmf(df, period=20):
    """
    Calculate Chaikin Money Flow (CMF) for a single symbol DataFrame.
    - Sorts by time ascending
    - Applies the standard CMF formula
    - Returns DataFrame sorted descending by time with a 'cmf' column.
    """
    # Sort oldest->newest
    df = df.sort_values(by='time', ascending=True)

    # Money Flow Multiplier (MFM)
    df['mfm'] = (
        ((df['close'] - df['low']) - (df['high'] - df['close'])) /
        (df['high'] - df['low']).replace(0, np.nan)
    )

    # Money Flow Volume (MFV)
    df['mfv'] = df['mfm'] * df['volume']

    # Rolling sums for CMF
    df['cmf'] = (
        df['mfv'].rolling(window=period).sum() /
        df['volume'].rolling(window=period).sum()
    )

    # Sort newest->oldest before returning
    df = df.sort_values(by='time', ascending=False)
    return df


def check_cmf_condition(df):
    """
    Add columns:
        - min_last_5_cmf: rolling min of the last 5 CMF values
        - cmf_diff: difference between cmf and min_last_5_cmf
        - is_cmf_condition_met: True if cmf_diff > 0.2 and min_last_5_cmf not NaN
    Returns the modified DataFrame sorted ascending.
    """
    # Sort ascending to have a proper rolling window over the correct chronological order
    df = df.sort_values(by='time', ascending=True)

    df['min_last_5_cmf'] = df['cmf'].rolling(window=5).min()
    df['cmf_diff'] = df['cmf'] - df['min_last_5_cmf']
    df['is_cmf_condition_met'] = (df['cmf_diff'] > 0.2) & df['min_last_5_cmf'].notna()

    return df


###############################################################################
# 3. Main Workflow
###############################################################################

def main():
    # Print message to indicate start of the program
    print("Starting CMF Indicator Automation...")

    # Define your date range
    start_date = (datetime.today() - pd.DateOffset(days=40)).strftime('%Y-%m-%d')
    end_date = datetime.today().strftime('%Y-%m-%d')

    # Get symbol list from Vnstock.
    print("Fetching symbol list...")
    stock = Vnstock().stock(source='VCI')
    df_symbols = stock.listing.symbols_by_exchange()
    filtered_df = df_symbols[df_symbols['exchange'].isin(['HSX', 'HNX'])]
    symbol_list = filtered_df['symbol'].tolist()
    print("Number of symbols:", len(symbol_list))
    print("Symbols List:", symbol_list)
    print("=====================================")

    # # For demonstration, use a smaller sample list
    # symbol_list = ['VNM', 'VIC', 'VHM', 'VRE', 'VJC']

    ###########################################################################
    # 3.1 Parallel data fetching with progress bar
    ###########################################################################
    price_data_list = []
    num_cores = 3

    with ThreadPoolExecutor(max_workers=num_cores) as executor:
        future_to_ticker = {
            executor.submit(fetch_data_for_ticker, ticker, start_date, end_date): ticker
            for ticker in symbol_list
        }
        
        # 'as_completed' yields futures as they finish
        # We wrap it in 'tqdm' to track how many have completed
        for future in tqdm(as_completed(future_to_ticker), 
                           total=len(symbol_list), 
                           desc="Fetching data"):
            ticker = future_to_ticker[future]
            try:
                data = future.result()
                if not data.empty:
                    price_data_list.append(data)
            except Exception as e:
                print(f"Failed to process {ticker}: {e}")

    # Combine all DataFrames
    if not price_data_list:
        print("No data fetched.")
        return

    price_data = pd.concat(price_data_list, ignore_index=True)    
    print("Data fetched successfully.")
    print("=====================================")

    # 3.2 Calculate CMF
    price_data = (
        price_data
        .groupby('symbol', group_keys=False)[['time', 'symbol', 'high', 'low', 'close', 'volume']]
        .apply(calculate_cmf)
    )

    # 3.3 Drop unnecessary columns
    price_data.drop(columns=['high', 'low', 'close', 'volume', 'mfm', 'mfv'], inplace=True)

    # 3.4 Check CMF conditions
    price_data = (
        price_data
        .groupby('symbol', group_keys=False)[['symbol', 'time', 'cmf']]
        .apply(check_cmf_condition)
    )

    # 3.5 Get latest row per symbol
    latest_data = (
        price_data
        .sort_values(by='time', ascending=False)
        .drop_duplicates(subset=['symbol'])
    )

    # Show the final columns
    print(latest_data[['symbol', 'time', 'cmf', 'min_last_5_cmf', 'is_cmf_condition_met']])
    print("=====================================")

    # Identify symbols that meet the condition
    symbols_to_buy = latest_data[latest_data['is_cmf_condition_met']]['symbol'].tolist()
    print('Symbols to buy:', symbols_to_buy)


# 4. Entry Point
if __name__ == "__main__":
    main()


Starting CMF Indicator Automation...
Fetching symbol list...
Number of symbols: 557
Symbols List: ['YEG', 'YBM', 'VTP', 'VTO', 'VTB', 'VSI', 'VSH', 'VSC', 'VRE', 'VRC', 'VPS', 'VPI', 'VPH', 'VPG', 'VPD', 'VPB', 'VOS', 'VNS', 'VNM', 'VNL', 'VNG', 'VNE', 'VND', 'VMD', 'VJC', 'VIX', 'VIP', 'VID', 'VIC', 'VIB', 'VHM', 'VHC', 'VGC', 'VFG', 'VDS', 'VDP', 'VCI', 'VCG', 'VCF', 'VCB', 'VCA', 'VAF', 'UIC', 'TYA', 'TVT', 'TVS', 'TVB', 'TV2', 'TTF', 'TTE', 'TTA', 'TSC', 'TRC', 'TRA', 'TPC', 'TPB', 'TNT', 'TNI', 'TNH', 'TNC', 'TN1', 'TMT', 'TMS', 'TMP', 'TLH', 'TLG', 'TLD', 'TIX', 'TIP', 'THG', 'TEG', 'TDW', 'TDP', 'TDM', 'TDH', 'TDG', 'TDC', 'TCT', 'TCR', 'TCO', 'TCM', 'TCL', 'TCI', 'TCH', 'TCD', 'TCB', 'TBC', 'SZL', 'SZC', 'SVT', 'SVI', 'SVD', 'SVC', 'STK', 'STG', 'STB', 'ST8', 'SSI', 'SSC', 'SSB', 'SRF', 'SRC', 'SPM', 'SMC', 'SMB', 'SMA', 'SKG', 'SJS', 'SJD', 'SIP', 'SHP', 'SHI', 'SHB', 'SHA', 'SGT', 'SGR', 'SGN', 'SFI', 'SFG', 'SFC', 'SCS', 'SCR', 'SC5', 'SBV', 'SBT', 'SBG', 'SBA', 'SAV', 'SAM'

Fetching data:  24%|██▍       | 133/557 [01:09<03:15,  2.17it/s]

Failed to fetch data for RDP: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  37%|███▋      | 204/557 [01:47<02:30,  2.34it/s]

Failed to fetch data for LEC: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  40%|███▉      | 222/557 [01:55<02:18,  2.43it/s]

Failed to fetch data for ITA: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  52%|█████▏    | 288/557 [02:24<01:12,  3.73it/s]

Failed to fetch data for FUEBFVND: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data:  57%|█████▋    | 320/557 [02:40<02:38,  1.50it/s]

Failed to fetch data for DRH: Không tìm thấy dữ liệu. Vui lòng kiểm tra lại mã chứng khoán hoặc thời gian truy xuất.


Fetching data: 100%|██████████| 557/557 [04:11<00:00,  2.22it/s]


Data fetched successfully.
         symbol       time       cmf  min_last_5_cmf  is_cmf_condition_met
5           YEG 2025-01-22       NaN             NaN                 False
4597        NHH 2025-01-22 -0.450291       -0.450291                 False
1300        TTA 2025-01-22 -0.031208       -0.118154                 False
11150  CHPG2412 2025-01-22       NaN             NaN                 False
4625        NHA 2025-01-22 -0.345090       -0.431651                 False
9380   CVIB2406 2025-01-22  0.131428        0.111201                 False
11168  CHPG2411 2025-01-22       NaN             NaN                 False
1328        TSC 2025-01-22 -0.365281       -0.365281                 False
8284        DIG 2025-01-22  0.061874       -0.058766                 False
11186  CHPG2410 2025-01-22       NaN             NaN                 False
12438       APG 2025-01-22       NaN             NaN                 False
1356        TRC 2025-01-22  0.324614        0.080925                  Tru