# 크롤링

In [1]:
# 라이브러리 불러오기
import requests
import yfinance as yf
import pandas as pd
from yahooquery import Ticker
import numpy as np
import cloudscraper

In [2]:
def convert(value):  # 끝이 M, K일때 숫자로 변환
    if isinstance(value, str):  # 입력값이 문자열인 경우만 처리
        if "K" in value:
            return float(value.replace("K", "")) * 1000
        if "M" in value:
            return float(value.replace("M", "")) * 1000000
    return value

In [3]:
def update_data(ticker_name):
    data = yf.download(ticker_name)
    data = data.sort_values(by="Date", ascending=True)
    data = data.drop(columns=["Adj Close"])
    data.to_csv(ticker_name + ".csv", encoding="utf-8-sig")
    return data

In [4]:
def update_data_investing_group(url, ticker_name):
    scraper = cloudscraper.create_scraper()
    html = scraper.get(url).content
    dfs = pd.read_html(html)

    if len(dfs) > 0:
        df = dfs[0]
    else:
        print("데이터를 찾을 수 없습니다.")
        return None

    df = df.drop(columns=["Change %"])
    df = df.rename(columns={"Vol.": "Volume", "Price": "Close"})
    df["Volume"] = df["Volume"].apply(convert)
    df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y")
    df.set_index("Date", inplace=True)

    data = pd.read_csv(ticker_name + ".csv", index_col="Date")
    data.index = pd.to_datetime(data.index)

    merged_data = pd.concat([data, df])

    for col in ["Open", "High", "Close", "Low"]:
        merged_data[col] = pd.to_numeric(
            merged_data[col].astype(str).str.replace(",", ""), errors="coerce"
        )

    # Remove duplicates if any.
    merged_data = merged_data.loc[~merged_data.index.duplicated(keep="first")]

    # Sort the dataframe based on date.
    merged_data.sort_index(inplace=True)

    # Save to csv.
    merged_data.to_csv(ticker_name + ".csv", index=True)

    return merged_data

In [5]:
def output_data(ticker_name):
    data = pd.read_csv(ticker_name+'.csv')
    data = data.set_index('Date')
    return data

In [6]:
def get_data_investing_exchange(url, ticker_name):
    scraper = cloudscraper.create_scraper()
    html = scraper.get(url).content
    dfs = pd.read_html(html)

    if len(dfs) > 0:
        df = dfs[1]
    else:
        print("데이터를 찾을 수 없습니다.")
        return None

    df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y")
    df.set_index("Date", inplace=True)

    data = pd.read_csv(ticker_name + ".csv", index_col="Date")
    data.index = pd.to_datetime(data.index)

    merged_data = pd.concat([data, df])

    # Remove duplicates if any.
    merged_data = merged_data.loc[~merged_data.index.duplicated(keep="first")]

    # Sort the dataframe based on date.
    merged_data.sort_index(inplace=True)

    # Save to csv.
    merged_data.to_csv(ticker_name + ".csv", index=True)

    return merged_data

### 한국

In [7]:
update_data("^KS11")

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1996-12-11,705.989990,709.479980,704.429993,704.679993,28000
1996-12-12,705.109985,706.010010,688.739990,689.380005,25900
1996-12-13,690.440002,695.719971,677.640015,689.070007,26500
1996-12-16,686.969971,686.969971,667.710022,673.919983,22800
1996-12-17,675.349976,680.090027,660.390015,663.349976,31600
...,...,...,...,...,...
2023-12-18,2568.770020,2573.129883,2556.050049,2566.860107,383000
2023-12-19,2564.810059,2570.060059,2556.520020,2568.550049,392500
2023-12-20,2586.989990,2615.379883,2584.850098,2614.300049,570400
2023-12-21,2598.370117,2610.810059,2587.159912,2600.020020,578300


In [8]:
update_data("055550.KS")

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-04,18688.708984,20234.212891,18308.277344,19259.357422,197145699
2000-01-05,19021.585938,19592.234375,18070.507812,18664.931641,186396333
2000-01-06,18807.593750,19497.126953,16406.119141,17119.427734,184079807
2000-01-07,18546.046875,18878.923828,17618.744141,18070.507812,256406581
2000-01-10,18783.816406,19211.802734,18546.046875,18926.478516,988218404
...,...,...,...,...,...
2023-12-18,38550.000000,38700.000000,38300.000000,38700.000000,664887
2023-12-19,38200.000000,38750.000000,38150.000000,38450.000000,648480
2023-12-20,38850.000000,39450.000000,38550.000000,39400.000000,930194
2023-12-21,38900.000000,39400.000000,38900.000000,39300.000000,621640


### 베트남

In [9]:
update_data_investing_group('https://www.investing.com/indices/vn-historical-data', '^VNINDEX.VN')

Unnamed: 0_level_0,Close,Open,High,Low,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-07-31,101.55,101.55,101.55,101.55,10.0
2000-08-02,103.38,103.38,103.38,103.38,
2000-08-04,105.20,105.20,105.20,105.20,0.0
2000-08-07,106.92,106.92,106.92,106.92,10.0
2000-08-09,108.64,108.64,108.64,108.64,20.0
...,...,...,...,...,...
2023-12-18,1091.88,1102.30,1103.74,1091.45,713160.0
2023-12-19,1096.30,1091.88,1096.30,1082.29,649910.0
2023-12-20,1100.76,1096.30,1102.27,1093.38,598210.0
2023-12-21,1096.66,1100.76,1100.76,1094.47,83600.0


In [10]:
update_data('VCB.VN')

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-06-30,22898.183594,22898.183594,22898.183594,22898.183594,770543
2009-07-01,24043.093750,24043.093750,22707.365234,23089.001953,16372619
2009-07-02,22707.365234,22898.183594,21944.093750,22134.912109,3971498
2009-07-03,21562.457031,21753.275391,21371.638672,21371.638672,2357526
2009-07-06,21371.638672,22325.730469,21371.638672,22325.730469,4118416
...,...,...,...,...,...
2023-12-18,82800.000000,82900.000000,81500.000000,81500.000000,2004600
2023-12-19,81500.000000,81500.000000,80100.000000,81200.000000,2079400
2023-12-20,81200.000000,81300.000000,80500.000000,81100.000000,2014300
2023-12-21,81000.000000,81000.000000,80300.000000,80900.000000,2148400


### 환율(USD_VND)

In [11]:
#usd_vnd1 = pd.read_csv('USD_VND Historical Data.csv')
#usd_vnd2 = pd.read_csv('USD_VND Historical Data (1).csv')
#usd_vnd = pd.concat([usd_vnd1,usd_vnd2])
#usd_vnd['Date'] = pd.to_datetime(usd_vnd['Date'], format='%m/%d/%Y').dt.date
#usd_vnd = usd_vnd.sort_values('Date')
#usd_vnd = usd_vnd.set_index('Date')
#usd_vnd.to_csv('usd_vnd.csv', index=True, encoding='utf-8-sig')

get_data_investing_exchange('https://www.investing.com/currencies/usd-vnd-historical-data', 'usd_vnd')

Unnamed: 0_level_0,Price,Open,High,Low,Vol.,Change %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1996-01-02,11011.0,11010.0,11011.0,11010.0,,0.00%
1996-01-03,11011.5,11010.0,11011.5,11010.0,,0.00%
1996-01-04,11011.5,11010.0,11011.5,11010.0,,0.00%
1996-01-05,11011.5,11010.0,11011.5,11010.0,,0.00%
1996-01-08,11011.5,11010.0,11011.5,11010.0,,0.00%
...,...,...,...,...,...,...
2023-12-18,24305.0,24265.0,24305.0,24230.0,,+0.19%
2023-12-19,24347.5,24360.0,24375.5,24340.0,,-0.01%
2023-12-20,24335.0,24330.0,24349.0,24275.0,,-0.06%
2023-12-21,24320.0,24342.5,24357.5,24320.0,,-0.06%


### 환율(USD_KRW)

In [12]:
#usd_krw1 = pd.read_csv('USD_KRW Historical Data.csv')
#usd_krw2 = pd.read_csv('USD_KRW Historical Data (1).csv')
#usd_krw = pd.concat([usd_krw1,usd_krw2])
#usd_krw['Date'] = pd.to_datetime(usd_krw['Date'], format='%m/%d/%Y').dt.date
#usd_krw = usd_krw.sort_values('Date')
#usd_krw = usd_krw.set_index('Date')
#usd_krw.to_csv('usd_krw.csv', index=True, encoding='utf-8-sig')

get_data_investing_exchange('https://www.investing.com/currencies/usd-krw-historical-data', 'usd_krw')

Unnamed: 0_level_0,Price,Open,High,Low,Vol.,Change %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1996-01-02,775.75,775.75,775.75,775.75,,0.00%
1996-01-03,778.45,774.75,778.75,774.05,,0.35%
1996-01-04,787.20,777.60,788.00,777.60,,1.12%
1996-01-05,788.20,786.10,788.60,784.10,,0.13%
1996-01-08,788.20,788.10,791.80,787.60,,0.00%
...,...,...,...,...,...,...
2023-12-18,1297.7,1298.69,1301.56,1296.39,,-0.08%
2023-12-19,1308.25,1302.12,1310.29,1300.33,,+0.41%
2023-12-20,1303.08,1300.84,1305.17,1297.06,,+0.22%
2023-12-21,1294.86,1303.31,1307.87,1295.91,,-0.63%


## 출력

In [13]:
def country_output(ticker_name):
    data = pd.read_csv(ticker_name + ".csv").dropna()
    data.index = pd.to_datetime(data["Date"])

    output = {}

    output["previous_close"] = "{:,.2f}".format(data.iloc[-1]["Close"])
    output["today_open"] = "{:,.2f}".format(data.iloc[-1]["Open"])
    output["today_volume"] = "{:,.2f}".format(data.iloc[-1]["Volume"])
    output["avg_volume"] = "{:,.0f}".format(data["Volume"].mean())

    last_row = data.iloc[-1]
    today_range = (
        "{:,.2f}".format(last_row["Low"]) + " - " + "{:,.2f}".format(last_row["High"])
    )
    output["range_days"] = today_range

    last_year_data = data.last("52W")
    year_range = (
        "{:,.2f}".format(last_year_data["Low"].min())
        + " - "
        + "{:,.2f}".format(last_year_data["High"].max())
    )
    output["range_52"] = year_range

    output["MA_50"] = round(data["Close"].rolling(window=50).mean().iloc[-1], 1)
    output["MA_200"] = round(data["Close"].rolling(window=200).mean().iloc[-1], 1)

    return output

In [14]:
def group_output(ticker_name):
    data = pd.read_csv(ticker_name + ".csv").dropna()
    data.index = pd.to_datetime(data["Date"])

    output = {}

    output["previous_close"] = "{:,.2f}".format(data.iloc[-1]["Close"])
    output["today_open"] = "{:,.2f}".format(data.iloc[-1]["Open"])
    output["today_volume"] = "{:,.2f}".format(data.iloc[-1]["Volume"])
    output["avg_volume"] = "{:,.0f}".format(data["Volume"].mean())

    last_row = data.iloc[-1]
    today_range = (
        "{:,.2f}".format(last_row["Low"]) + " - " + "{:,.2f}".format(last_row["High"])
    )
    output["range_days"] = today_range

    last_year_data = data.last("52W")
    year_range = (
        "{:,.2f}".format(last_year_data["Low"].min())
        + " - "
        + "{:,.2f}".format(last_year_data["High"].max())
    )
    output["range_52"] = year_range

    output["MA_50"] = round(data["Close"].rolling(window=50).mean().iloc[-1], 1)
    output["MA_200"] = round(data["Close"].rolling(window=200).mean().iloc[-1], 1)

    tick = yf.Ticker(ticker_name)

    output["Beta"] = tick.info["beta"]

    num = tick.info["enterpriseValue"]
    if num >= 10**12:  # 천억 이상
        output["enterprise"] = str(round(num / 10**12, 2)) + "T"
    elif num >= 10**8:  # 백만 이상
        output["enterprise"] = str(round(num / 10**8, 2)) + "M"
    else:
        output["enterprise"] = str(num)

    output["Buy"] = tick.info["bid"]
    output["Sell"] = tick.info["ask"]

    return output

In [15]:
country_output("^KS11")

  last_year_data = data.last("52W")


{'previous_close': '2,599.51',
 'today_open': '2,617.72',
 'today_volume': '466,000.00',
 'avg_volume': '439,048',
 'range_days': '2,599.51 - 2,621.37',
 'range_52': '2,180.67 - 2,668.21',
 'MA_50': 2463.8,
 'MA_200': 2516.2}

In [16]:
country_output("^VNINDEX.VN")

  last_year_data = data.last("52W")


{'previous_close': '1,103.06',
 'today_open': '1,102.43',
 'today_volume': '558,030.00',
 'avg_volume': '256,052',
 'range_days': '1,099.32 - 1,104.65',
 'range_52': '983.67 - 1,255.11',
 'MA_50': 1151.6,
 'MA_200': 1115.5}

In [17]:
group_output("055550.KS")

  last_year_data = data.last("52W")


{'previous_close': '38,800.00',
 'today_open': '39,350.00',
 'today_volume': '387,597.00',
 'avg_volume': '181,101,727',
 'range_days': '38,800.00 - 39,500.00',
 'range_52': '32,400.00 - 44,900.00',
 'MA_50': 36328.0,
 'MA_200': 35422.5,
 'Beta': 0.613,
 'enterprise': '66.52T',
 'Buy': 38800.0,
 'Sell': 38850.0}

In [18]:
group_output("VCB.VN")

  last_year_data = data.last("52W")


{'previous_close': '80,900.00',
 'today_open': '80,900.00',
 'today_volume': '1,721,800.00',
 'avg_volume': '1,763,505',
 'range_days': '80,400.00 - 81,100.00',
 'range_52': '66,807.79 - 93,400.00',
 'MA_50': 85534.0,
 'MA_200': 83760.6,
 'Beta': 0.793,
 'enterprise': '133.99T',
 'Buy': 81000.0,
 'Sell': 81100.0}