# VNINDEX

In [6]:
import concurrent.futures
from datetime import datetime
import math
import os
import pandas
import requests
import time
from tqdm.notebook import tqdm_notebook


## Constants

In [7]:
CHUNK_SIZE = 200

TIMEOUT = 10

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
}

## Utils

In [8]:
def add_zero(number : int):
    if number > 9:
        return f"{number}"
    return f"0{number}"

def chunks(list, n):
    # looping till length l
    for i in range(0, len(list), n):
        yield list[i:i + n]

## Symbols

In [9]:
GATEWAY_URL = "https://wgateway-iboard.ssi.com.vn/graphql"

STOCK_SYMBOL_QUERY = """
    query {
        hose: stockRealtimes(exchange: "hose") {
          stockSymbol
        }
        hnx: stockRealtimes(exchange: "hnx") {
          stockSymbol
        }
        upcom: stockRealtimes(exchange: "upcom") {
          stockSymbol
        }
    }
"""

stock_symbols_response = requests.post(
        url=GATEWAY_URL, json={"query": STOCK_SYMBOL_QUERY}, timeout=TIMEOUT)

stock_symbols_json = stock_symbols_response.json()
stock_symbols_data = stock_symbols_json.get("data", {})
stock_symbols_hose = list(map(lambda item: { "market": "HOSE", "symbol": item.get("stockSymbol", "") },stock_symbols_data.get("hose", [])))
stock_symbols_hnx = list(map(lambda item: { "market": "HNX", "symbol": item.get("stockSymbol", "") },stock_symbols_data.get("hnx", [])))
stock_symbols_upcom = list(map(lambda item: { "market": "UPCOM", "symbol": item.get("stockSymbol", "") },stock_symbols_data.get("upcom", [])))
stock_symbols_vnindex = list(filter(lambda item: len(item.get("symbol")) == 3, stock_symbols_hose + stock_symbols_hnx + stock_symbols_upcom))
# Data Frame
stock_symbols_vnindex_data_frame = pandas.DataFrame(stock_symbols_vnindex)
sorted_stock_symbols_vnindex_data_frame = stock_symbols_vnindex_data_frame.sort_values(ascending=True, by="symbol")
sorted_stock_symbols_vnindex_data_frame.to_csv("./data/symbols.csv", index=False, header = True)

## Companies

In [10]:
INFO_URL = "https://finfo-iboard.ssi.com.vn/graphql"

COMPANY_PROFILE_QUERY = """
    query companyProfile {
        companyProfile(symbol: "%s", language: "en") {
            symbol
            companyname
            industryname
            supersector
            sector
            subsector
            listingdate
            issueshare
            listedvalue
        }
        companyStatistics(symbol: "%s") {
            marketcap
        }
    }
"""

def get_company(symbol : str):
    company_profile_query = COMPANY_PROFILE_QUERY % (
        symbol, symbol)
    try:
        company_profile_response = requests.post(
            url=INFO_URL,
            json={"query": company_profile_query},
            headers=HEADERS,
            timeout=TIMEOUT
        )
        company_profile_json = company_profile_response.json()
        company_profile_data = company_profile_json.get("data", {})
        company_profile = company_profile_data.get("companyProfile", {})
        company_statistics = company_profile_data.get("companyStatistics", {})
        symbol = company_profile.get("symbol", "")
        name = company_profile.get("companyname", "")
        industry = company_profile.get("industryname", "")
        supersector = company_profile.get("supersector", "")
        sector = company_profile.get("sector", "")
        subsector = company_profile.get("subsector", "")
        listing_date_string : str = company_profile.get("listingdate", "").split(" ")[0]
        date_month_year = listing_date_string.split("/")
        listing_date = f"{add_zero(int(date_month_year[2]))}-{add_zero(int(date_month_year[1]))}-{add_zero(int(date_month_year[0]))}"
        issue_share = company_profile.get("issueshare", "")
        listed_value = company_profile.get("issueshare", "")
        market_cap = company_statistics.get("marketcap", "")
        return {
            "symbol": symbol,
            "name": name,
            "industry": industry,
            "supersector": supersector,
            "sector": sector,
            "subsector": subsector,
            "listing_date": listing_date,
            "issue_share": issue_share,
            "listed_value": listed_value,
            "market_cap": market_cap
        }
    except Exception as ex:
        print(ex, "Error")
        return {
            "symbol": symbol,
            "name": "",
            "industry": "",
            "supersector": "",
            "sector": "",
            "subsector": "",
            "listing_date": "",
            "issue_share": "",
            "listed_value": "",
            "market_cap": ""
        }

chunks_stock_symbols = list(chunks(stock_symbols_vnindex, CHUNK_SIZE))

companies = []

for chunk_stock_symbols in tqdm_notebook(chunks_stock_symbols):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for stock_symbol in chunk_stock_symbols:
            symbol = stock_symbol.get("symbol")
            futures.append(executor.submit(get_company, symbol=symbol))
        for future in concurrent.futures.as_completed(futures):
            company = future.result()
            companies.append(company)

# Data Frame
companies_data_frame = pandas.DataFrame(companies)
sorted_companies_data_frame = companies_data_frame.sort_values(ascending=True, by="symbol")
sorted_companies_data_frame.to_csv("./data/companies.csv", index=False, header = True)

  0%|          | 0/9 [00:00<?, ?it/s]

list index out of range Error


## History

In [11]:
HISTORY_URL = "https://iboard.ssi.com.vn/dchart/api/history"

def get_old_history(symbol: str):
    try:
        history_filepath = f"./data/history/{symbol}.csv"
        old_history_data_frame = pandas.read_csv(filepath_or_buffer=history_filepath)
        return old_history_data_frame.to_dict('records')
    except:
        return []
    
weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"]

def get_trend(open : float, close : float):
    if open == close:
        return "SIDEWAY"
    if open > close:
        return "DOWNTREND"
    return "UPTREND"

def map_history(symbol : str, list_t : list, list_o : list, list_h : list, list_l : list, list_c : list, list_v : list):
    history = []
    for index, value in enumerate(list_t):
        date_time = datetime.fromtimestamp(value)
        full_date = date_time.strftime("%Y-%m-%d")
        year = date_time.year
        month = date_time.month
        date = date_time.day
        week = date_time.isocalendar().week
        weekday_index = date_time.isocalendar().weekday
        weekday = weekdays[weekday_index]
        quarter = math.ceil(month / 3)
        price_open = float(list_o[index])
        price_high = float(list_h[index])
        price_low = float(list_l[index])
        price_close = float(list_c[index])
        volume = int(list_v[index])
        timestamp = int(list_t[index])
        trend = get_trend(price_open, price_close)
        history.append({
            "symbol": symbol,
            "year": year,
            "month": month,
            "date": date,
            "quarter": quarter,
            "week": week,
            "weekday_index": weekday_index,
            "weekday": weekday,
            "full_date": full_date,
            "open": price_open,
            "high": price_high,
            "low": price_low,
            "close": price_close,
            "volume": volume,
            "timestamp": timestamp,
            "trend": trend
        })
    return history

def get_new_history(symbol: str, to: int):
    try:
        history_url = HISTORY_URL + f"?resolution=D&symbol={symbol}&from=0&to={to}"
        history_response = requests.get(history_url, allow_redirects=False, headers=HEADERS, timeout=TIMEOUT)
        history_json = history_response.json()
        t = history_json.get("t", [])
        o = history_json.get("o", [])
        h = history_json.get("h", [])
        l = history_json.get("l", [])
        c = history_json.get("c", [])
        v = history_json.get("v", [])
        history = map_history(symbol, t, o, h, l, c, v)
        return history
    except:
        return []

def get_history(company):
    symbol = company.get("symbol", "")
    to = math.floor(time.time())
    old_history = get_old_history(symbol)
    new_history = get_new_history(symbol, to)
    combine_history = new_history + old_history
    history = list({v['full_date']: v for v in combine_history}.values())
    if len(history) == 0:
        return "EMPTY"
    # Data Frame
    history_filepath = f"./data/history/{symbol}.csv"
    history_data_frame = pandas.DataFrame(history)
    sorted_history_data_frame = history_data_frame.sort_values(ascending=True, by="full_date")
    sorted_history_data_frame.to_csv(history_filepath, index=False, header = True)
    return "DONE"
    
chunks_companies = list(chunks(companies, CHUNK_SIZE))

for chunk_companies in tqdm_notebook(chunks_companies):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for company in chunk_companies:
            futures.append(executor.submit(get_history, company=company))
        for future in concurrent.futures.as_completed(futures):
            result = future.result()

  0%|          | 0/9 [00:00<?, ?it/s]

## Dragon Capital

In [12]:
product_codes = ["e1vfvn30", "fuevfvnd", "dcbc", "dcds"]

BASE_URL = "https://api.dragoncapital.com.vn/fundfactsheet/top_holdings/getValueByDate.php"


for product_code in product_codes:
    dates_url = f"{BASE_URL}?trade_code={product_code}"
    dates_response = requests.get(dates_url, timeout=10)
    response_json = dates_response.json()
    available_dates = response_json["available_dates"]
    available_dates_rows = []
    for available_date in available_dates:
        available_dates_row = {}
        available_dates_row["available_date"] = available_date
        available_dates_row["product_code"] = product_code
        available_dates_row["capital"] = "dragon-capital"
        available_dates_rows.append(available_dates_row)
    available_dates_rows_data_frame = pandas.DataFrame(available_dates_rows)
    available_dates_file_path = \
        f"./data/capital/dragon-capital/{product_code}/available-dates.csv"
    available_dates_rows_data_frame.to_csv(available_dates_file_path)
    # Top Holding
    top_holdings_rows = []
    for available_date in available_dates:
        print(product_code, available_date)
        top_holdings_url = f"{BASE_URL}?trade_code={product_code}&date_upload={available_date}"
        top_holdings_response = requests.get(top_holdings_url, timeout=10)
        top_holdings_json = top_holdings_response.json()
        top_holdings = top_holdings_json["ffs_holding"]
        top_holdings_rows += top_holdings
    top_holdings_rows_data_frame = pandas.DataFrame(top_holdings_rows)
    top_holdings_file_path = \
        f"./data/capital/dragon-capital/{product_code}/portfolio.csv"
    top_holdings_rows_data_frame.to_csv(top_holdings_file_path)


e1vfvn30 2023-02-28
e1vfvn30 2023-01-31
e1vfvn30 2022-12-31
e1vfvn30 2022-11-30
e1vfvn30 2022-10-31
e1vfvn30 2022-09-30
e1vfvn30 2022-08-31
e1vfvn30 2022-07-31
e1vfvn30 2022-06-30
e1vfvn30 2022-05-31
e1vfvn30 2022-04-30
e1vfvn30 2022-03-31
e1vfvn30 2022-02-28
e1vfvn30 2022-01-31
e1vfvn30 2021-12-31
e1vfvn30 2021-11-30
e1vfvn30 2021-10-31
e1vfvn30 2021-09-30
e1vfvn30 2021-08-31
e1vfvn30 2021-07-31
e1vfvn30 2021-06-30
e1vfvn30 2021-05-31
e1vfvn30 2021-04-30
e1vfvn30 2021-03-31
e1vfvn30 2021-02-28
e1vfvn30 2021-01-31
e1vfvn30 2020-12-31
e1vfvn30 2020-11-30
e1vfvn30 2020-10-31
e1vfvn30 2020-09-30
e1vfvn30 2020-08-31
e1vfvn30 2020-07-31
e1vfvn30 2020-06-30
e1vfvn30 2020-05-31
e1vfvn30 2020-04-30
e1vfvn30 2020-03-31
e1vfvn30 2020-02-29
e1vfvn30 2020-01-31
e1vfvn30 2019-12-31
e1vfvn30 2019-11-30
e1vfvn30 2019-10-31
e1vfvn30 2019-09-30
e1vfvn30 2019-07-31
e1vfvn30 2019-06-30
e1vfvn30 2019-05-31
e1vfvn30 2019-04-30
e1vfvn30 2019-03-31
e1vfvn30 2019-02-28
e1vfvn30 2019-01-31
e1vfvn30 2018-12-31
