In [89]:
import pandas as pd
import numpy as np
from vnstock import Vnstock
from datetime import datetime
import time
import logging

# logging.getLogger("vnstock").setLevel(logging.CRITICAL)
logging.disable(logging.CRITICAL)


pd.set_option('display.max_rows', None)  # Display all rows

# Get stock data from VCI
stock = Vnstock().stock(source='VCI')


In [90]:

# Get all symbols from HSX and HNX
df = stock.listing.symbols_by_exchange()
filtered_df = df[df['exchange'].isin(['HSX', 'HNX'])]
symbol_list = filtered_df['symbol'].tolist()

print('Total symbols: ', len(symbol_list))
print('Symbols: ', symbol_list)

Total symbols:  877


In [91]:
# Function to fetch historical price data for a given ticker
def fetch_price_history(ticker, start_date=None, end_date=None):
    if end_date is None:
        end_date = datetime.today().strftime('%Y-%m-%d')

    try:
        # Fetch the price data for the entire date range for the ticker
        stock = Vnstock().stock(symbol=ticker, source='VCI')
        df = stock.quote.history(start=start_date, end=end_date, to_df=True)

        if df.empty:
            return pd.DataFrame()

        # Sort by 'time' in descending order
        df['time'] = pd.to_datetime(df['time'])
        df = df.sort_values(by='time', ascending=False)

        # wait for 1 second to avoid being blocked
        time.sleep(1)

        return df[['time', 'high', 'low', 'close', 'volume']]  # Return only time and close price columns
    
    except Exception as e:
        print(f"Failed to fetch data for {ticker}: {e}")
        return pd.DataFrame()

In [92]:
from datetime import datetime
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import os

# Define the start and end dates
start_date = (datetime.today() - pd.DateOffset(days=40)).strftime('%Y-%m-%d')
end_date = datetime.today().strftime('%Y-%m-%d')

# Function to fetch data for a single ticker
def fetch_data_for_ticker(ticker):
    try:
        df = fetch_price_history(ticker, start_date, end_date)
        if not df.empty:
            df['symbol'] = ticker
        return df
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()

symbol_list = ['VNM']


# Use ThreadPoolExecutor for parallel requests
price_data_list = []

num_cores = os.cpu_count()


with ThreadPoolExecutor(max_workers=num_cores) as executor:  # Adjust `max_workers` as needed
    future_to_ticker = {executor.submit(fetch_data_for_ticker, ticker): ticker for ticker in symbol_list}
    for future in as_completed(future_to_ticker):
        ticker = future_to_ticker[future]
        try:
            data = future.result()
            if not data.empty:
                price_data_list.append(data)
        except Exception as e:
            print(f"Failed to process {ticker}: {e}")

# Combine all DataFrames into one
price_data = pd.concat(price_data_list, ignore_index=True)


Number of CPU cores: 16


In [93]:
# Function to calculate CMF for a single symbol
def calculate_cmf(df, period=20):
    # Sort by time in chronological order
    df = df.sort_values(by='time')
    
    # Calculate Money Flow Multiplier (MFM)
    df['mfm'] = ((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low']).replace(0, float('nan'))
    
    # Calculate Money Flow Volume (MFV)
    df['mfv'] = df['mfm'] * df['volume']
    
    # Calculate rolling sums for CMF
    df['cmf'] = df['mfv'].rolling(window=period).sum() / df['volume'].rolling(window=period).sum()
    
    # Sort back to original (reverse) order
    df = df.sort_values(by='time', ascending=False)
    return df

# Explicitly pick the columns you need (including 'symbol')
price_data = (
    price_data
    .groupby('symbol', group_keys=False)[['time', 'symbol', 'high', 'low', 'close', 'volume']]
    .apply(calculate_cmf)
)


# Drop high low close volume mf mfv columns
price_data = price_data.drop(columns=['high', 'low', 'close', 'volume', 'mfm', 'mfv'])

# Display the DataFrame with the new 'cmf' column
print(price_data)


         time symbol       cmf
0  2025-01-22    VNM -0.515595
1  2025-01-21    VNM -0.542508
2  2025-01-20    VNM -0.564818
3  2025-01-17    VNM -0.416418
4  2025-01-16    VNM -0.444945
5  2025-01-15    VNM -0.464953
6  2025-01-14    VNM -0.450944
7  2025-01-13    VNM -0.403239
8  2025-01-10    VNM -0.411446
9  2025-01-09    VNM       NaN
10 2025-01-08    VNM       NaN
11 2025-01-07    VNM       NaN
12 2025-01-06    VNM       NaN
13 2025-01-03    VNM       NaN
14 2025-01-02    VNM       NaN
15 2024-12-31    VNM       NaN
16 2024-12-30    VNM       NaN
17 2024-12-27    VNM       NaN
18 2024-12-26    VNM       NaN
19 2024-12-25    VNM       NaN
20 2024-12-24    VNM       NaN
21 2024-12-23    VNM       NaN
22 2024-12-20    VNM       NaN
23 2024-12-19    VNM       NaN
24 2024-12-18    VNM       NaN
25 2024-12-17    VNM       NaN
26 2024-12-16    VNM       NaN
27 2024-12-13    VNM       NaN


In [None]:
# Function to check the CMF condition for each group (symbol)
def check_cmf_condition(df):
    # Ensure data is sorted by time in descending order (latest first)
    df = df.sort_values(by='time', ascending=True)
    
    # Calculate the rolling minimum of the last 5 CMF values
    df['min_last_5_cmf'] = df['cmf'].rolling(window=5).min()
    
    # Different between current CMF and the rolling minimum of the last 5 CMF values
    df['cmf_diff'] = df['cmf'] - df['min_last_5_cmf']



    # Check the condition for rows where rolling minimum is not NaN
    # df['is_cmf_condition_met'] = (df['cmf'] > (df['min_last_5_cmf'] + 0.2)) & df['min_last_5_cmf'].notna()
    df['is_cmf_condition_met'] = (df['cmf_diff'] > 0.2) & df['min_last_5_cmf'].notna()
    
    return df

# Apply the function to each symbol group
price_data = (
    price_data
    .groupby('symbol', group_keys=False)[['symbol', 'time', 'cmf']]
    .apply(check_cmf_condition)
)

# Filter for the latest row for each symbol to check the condition
latest_data = price_data.sort_values(by='time', ascending=False).drop_duplicates(subset=['symbol'])


In [96]:

# Show results for the latest CMF condition
print(latest_data[['symbol', 'time', 'cmf', 'min_last_5_cmf', 'is_cmf_condition_met']])

# print the symbols that meet the condition
symbols_to_buy = latest_data[latest_data['is_cmf_condition_met']]['symbol'].tolist()
print('Symbols to buy: ', symbols_to_buy)


  symbol       time       cmf  min_last_5_cmf  is_cmf_condition_met
0    VNM 2025-01-22 -0.515595       -0.564818                 False
Symbols to buy:  []
