In [1]:
from datetime import datetime
import json
import math
import os
import pandas as pd
import requests
import shutil
from tqdm.notebook import tqdm

In [19]:
url = "https://histdatafeed.vps.com.vn/industry"
response = requests.get(url)
data = response.json()
industries = data.get("data", [])
industries_data_frame = pd.DataFrame(industries)
industries_data_frame.rename(columns={"IcbName": "name", "IcbNameEn": "english_name"})
industries_data_frame.to_csv("./data/industries.csv", index=False)

In [20]:
url = "https://bgapidatafeed.vps.com.vn/listvn30"
response = requests.get(url)
vn30 = response.json()
vn30_file = open("./vn30.json", "w")
vn30_json = json.dumps(vn30, indent=2)
vn30_file.write(vn30_json)
vn30_file.close()

In [21]:
url = "https://bgapidatafeed.vps.com.vn/getlistallstock"
response = requests.get(url)
companies: list[dict] = response.json()
companies_data_frame = pd.DataFrame(companies)
companies_data_frame.to_csv("./data/companies.csv", index=False)

In [22]:
timestamp = math.floor(datetime.now().timestamp())
timestamp

1701745708

In [23]:
os.makedirs("temp/history", exist_ok=True)

In [24]:
def get_history(symbol: str):
    now = math.floor(datetime.now().timestamp())
    url = f"https://histdatafeed.vps.com.vn/tradingview/history?symbol={symbol}&resolution=1D&from=0&to={now}"
    response = requests.get(url)
    history_data: dict = response.json()
    symbol = history_data.get("symbol", "")
    timestamp: list[int] = history_data.get("t", [])
    open_price: list[float] = history_data.get("o", [])
    high: list[float] = history_data.get("h", [])
    low: list[float] = history_data.get("l", [])
    close: list[float] = history_data.get("c", [])
    volumn: list[int] = history_data.get("v", [])
    history: list[dict] = []
    if len(timestamp) > 0:
        for index, timestamp_item in enumerate(timestamp):
            date = datetime.fromtimestamp(timestamp).date()
            history.append(
                {
                    "symbol": symbol,
                    "date": date,
                    "timestamp": timestamp_item,
                    "open": open_price[index],
                    "high": high[index],
                    "low": low[index],
                    "close": close[index],
                    "volume": volumn[index],
                }
            )
        history_data_frame = pd.DataFrame(history)
        history_data_frame["date"] = pd.to_datetime(history_data_frame["date"])
        first: str = symbol[0].lower()
        folder_path = f"./temp/history/{first}"
        file_path = f"{folder_path}/{symbol}.csv"
        os.makedirs(folder_path, exist_ok=True)
        history_data_frame.to_csv(file_path, index=False)

In [25]:
for symbol in tqdm(vn30):
    get_history(symbol)

  0%|          | 0/30 [00:00<?, ?it/s]

In [26]:
list_of_stock = list(filter(lambda c: len(c.get("stock_code")) == 3, companies))
len(list_of_stock)

1617

In [27]:
list_of_stock.reverse()
for stock in tqdm(list_of_stock):
    symbol: str = stock.get("stock_code", "")
    get_history(symbol)

  0%|          | 0/1617 [00:00<?, ?it/s]

In [None]:
csv_files = []

for root, _, files in os.walk("./temp"):
    for file in files:
        if file.endswith(".csv"):
            csv_files.append(os.path.join(root, file))

data_frames = []
for csv_file in csv_files:
    data_frame = pd.read_csv(csv_file)
    data_frames.append(data_frame)

history_data_frame = pd.concat(data_frames, ignore_index=True)
history_data_frame.to_csv("./data/history.csv", index=False)

In [2]:
shutil.rmtree("./temp")