# 크롤링

In [1]:
# 라이브러리 불러오기
import requests
import yfinance as yf
import pandas as pd
from yahooquery import Ticker
import numpy as np
import cloudscraper

In [2]:
def convert(value):  # 끝이 M, K일때 숫자로 변환
    if isinstance(value, str):  # 입력값이 문자열인 경우만 처리
        if "K" in value:
            return float(value.replace("K", "")) * 1000
        if "M" in value:
            return float(value.replace("M", "")) * 1000000
    return value

In [3]:
def get_data(ticker_name):
    data = yf.download(ticker_name)
    data = data.sort_values(by="Date", ascending=True)
    data = data.drop(columns=["Adj Close"])
    data.to_csv(ticker_name + ".csv", encoding="utf-8-sig")
    return data

In [4]:
def get_data_investing_group(url, ticker_name):
    scraper = cloudscraper.create_scraper()
    html = scraper.get(url).content
    dfs = pd.read_html(html)

    if len(dfs) > 0:
        df = dfs[0]
    else:
        print("데이터를 찾을 수 없습니다.")
        return None

    df = df.drop(columns=["Change %"])
    df = df.rename(columns={"Vol.": "Volume", "Price": "Close"})
    df["Volume"] = df["Volume"].apply(convert)
    df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y")
    df.set_index("Date", inplace=True)

    data = pd.read_csv(ticker_name + ".csv", index_col="Date")
    data.index = pd.to_datetime(data.index)

    merged_data = pd.concat([data, df])

    for col in ["Open", "High", "Close", "Low"]:
        merged_data[col] = pd.to_numeric(
            merged_data[col].astype(str).str.replace(",", ""), errors="coerce"
        )

    # Remove duplicates if any.
    merged_data = merged_data.loc[~merged_data.index.duplicated(keep="first")]

    # Sort the dataframe based on date.
    merged_data.sort_index(inplace=True)

    # Save to csv.
    merged_data.to_csv(ticker_name + ".csv", index=True)

    return merged_data

In [5]:
def get_data_investing_exchange(url, ticker_name):
    scraper = cloudscraper.create_scraper()
    html = scraper.get(url).content
    dfs = pd.read_html(html)

    if len(dfs) > 0:
        df = dfs[1]
    else:
        print("데이터를 찾을 수 없습니다.")
        return None

    df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y")
    df.set_index("Date", inplace=True)

    data = pd.read_csv(ticker_name + ".csv", index_col="Date")
    data.index = pd.to_datetime(data.index)

    merged_data = pd.concat([data, df])

    # Remove duplicates if any.
    merged_data = merged_data.loc[~merged_data.index.duplicated(keep="first")]

    # Sort the dataframe based on date.
    merged_data.sort_index(inplace=True)

    # Save to csv.
    merged_data.to_csv(ticker_name + ".csv", index=True)

    return merged_data

### 한국

In [6]:
kospi = get_data('^KS11') # 코스피
kospi

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1996-12-11,705.989990,709.479980,704.429993,704.679993,28000
1996-12-12,705.109985,706.010010,688.739990,689.380005,25900
1996-12-13,690.440002,695.719971,677.640015,689.070007,26500
1996-12-16,686.969971,686.969971,667.710022,673.919983,22800
1996-12-17,675.349976,680.090027,660.390015,663.349976,31600
...,...,...,...,...,...
2023-11-24,2517.879883,2521.560059,2496.629883,2496.629883,359400
2023-11-27,2501.830078,2511.370117,2489.179932,2495.659912,364700
2023-11-28,2506.139893,2522.449951,2502.260010,2521.760010,442400
2023-11-29,2518.800049,2523.979980,2501.439941,2519.810059,579300


In [7]:
KB = get_data('055550.KS') # 신한은행
KB 

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-04,18688.708984,20234.212891,18308.277344,19259.357422,197145699
2000-01-05,19021.585938,19592.234375,18070.507812,18664.931641,186396333
2000-01-06,18807.593750,19497.126953,16406.119141,17119.427734,184079807
2000-01-07,18546.046875,18878.923828,17618.744141,18070.507812,256406581
2000-01-10,18783.816406,19211.802734,18546.046875,18926.478516,988218404
...,...,...,...,...,...
2023-11-24,37050.000000,37100.000000,36750.000000,36750.000000,404462
2023-11-27,36500.000000,37000.000000,36450.000000,36750.000000,601759
2023-11-28,37050.000000,37100.000000,36600.000000,37000.000000,742029
2023-11-29,36600.000000,36950.000000,36200.000000,36300.000000,726167


### 베트남

In [8]:
# a = pd.read_csv('VN Index Historical Data.csv')
# b = pd.read_csv('VN Index Historical Data (1).csv')
# c = pd.concat([a,b])
# c = c.rename(columns = {'Vol.' : 'Volume', 'Price' : 'Close'})
# c['Date'] = pd.to_datetime(c['Date'], format='%m/%d/%Y').dt.date
# c['Volume'] = c['Volume'].apply(convert)
# c = c.drop(columns = ['Change %'])
# c = c.sort_values('Date')
# c = c.set_index('Date')
# c.to_csv("VNINDEX.csv", index=True,encoding='utf-8-sig')

vni = get_data_investing_group('https://www.investing.com/indices/vn-historical-data', '^VNINDEX.VN')
vni

Unnamed: 0_level_0,Close,Open,High,Low,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-07-31,101.55,101.55,101.55,101.55,10.0
2000-08-02,103.38,103.38,103.38,103.38,
2000-08-04,105.20,105.20,105.20,105.20,0.0
2000-08-07,106.92,106.92,106.92,106.92,10.0
2000-08-09,108.64,108.64,108.64,108.64,20.0
...,...,...,...,...,...
2023-11-24,1095.61,1088.49,1095.61,1073.31,958340.0
2023-11-27,1088.06,1095.61,1096.84,1085.69,595020.0
2023-11-28,1078.14,1088.06,1089.39,1076.45,341510.0
2023-11-29,1102.80,1095.43,1105.28,1095.43,614960.0


#### VCB

In [9]:
# a = pd.read_csv('VCB Historical Data.csv')
# a = a.rename(columns = {'Vol.' : 'Volume', 'Price' : 'Close'})
# a['Date'] = pd.to_datetime(a['Date'], format='%m/%d/%Y').dt.date
# a = a.drop(columns = ['Change %'])
# a = a.sort_values('Date')
# a = a.set_index('Date')
# a['Volume'] = a['Volume'].apply(convert)
# a.to_csv("VCB.csv", index=True,encoding='utf-8-sig')

vcb = get_data("VCB.VN")
vcb

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-06-30,22898.183594,22898.183594,22898.183594,22898.183594,770543
2009-07-01,24043.093750,24043.093750,22707.365234,23089.001953,16372619
2009-07-02,22707.365234,22898.183594,21944.093750,22134.912109,3971498
2009-07-03,21562.457031,21753.275391,21371.638672,21371.638672,2357526
2009-07-06,21371.638672,22325.730469,21371.638672,22325.730469,4118416
...,...,...,...,...,...
2023-11-24,85500.000000,86000.000000,85000.000000,86000.000000,793500
2023-11-27,86000.000000,86000.000000,85100.000000,85800.000000,548000
2023-11-28,85500.000000,85900.000000,84800.000000,85800.000000,926200
2023-11-29,85400.000000,85900.000000,85100.000000,85100.000000,838100


### 환율(USD_VND)

In [10]:
#usd_vnd1 = pd.read_csv('USD_VND Historical Data.csv')
#usd_vnd2 = pd.read_csv('USD_VND Historical Data (1).csv')
#usd_vnd = pd.concat([usd_vnd1,usd_vnd2])
#usd_vnd['Date'] = pd.to_datetime(usd_vnd['Date'], format='%m/%d/%Y').dt.date
#usd_vnd = usd_vnd.sort_values('Date')
#usd_vnd = usd_vnd.set_index('Date')
#usd_vnd.to_csv('usd_vnd.csv', index=True, encoding='utf-8-sig')

get_data_investing_exchange('https://www.investing.com/currencies/usd-vnd-historical-data', 'usd_vnd')

Unnamed: 0_level_0,Price,Open,High,Low,Vol.,Change %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1996-01-02,11011.0,11010.0,11011.0,11010.0,,0.00%
1996-01-03,11011.5,11010.0,11011.5,11010.0,,0.00%
1996-01-04,11011.5,11010.0,11011.5,11010.0,,0.00%
1996-01-05,11011.5,11010.0,11011.5,11010.0,,0.00%
1996-01-08,11011.5,11010.0,11011.5,11010.0,,0.00%
...,...,...,...,...,...,...
2023-11-24,24260.0,24260.0,24282.0,24210.0,,0.08%
2023-11-27,24230.0,24230.0,24270.0,24221.5,,-0.12%
2023-11-28,24230.0,24257.5,24260.0,24228.5,,0.00%
2023-11-29,24260.0,24190.0,24270.0,24182.0,,0.12%


### 환율(USD_KRW)

In [11]:
#usd_krw1 = pd.read_csv('USD_KRW Historical Data.csv')
#usd_krw2 = pd.read_csv('USD_KRW Historical Data (1).csv')
#usd_krw = pd.concat([usd_krw1,usd_krw2])
#usd_krw['Date'] = pd.to_datetime(usd_krw['Date'], format='%m/%d/%Y').dt.date
#usd_krw = usd_krw.sort_values('Date')
#usd_krw = usd_krw.set_index('Date')
#usd_krw.to_csv('usd_krw.csv', index=True, encoding='utf-8-sig')

get_data_investing_exchange('https://www.investing.com/currencies/usd-krw-historical-data', 'usd_krw')

Unnamed: 0_level_0,Price,Open,High,Low,Vol.,Change %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1996-01-02,775.75,775.75,775.75,775.75,,0.00%
1996-01-03,778.45,774.75,778.75,774.05,,0.35%
1996-01-04,787.20,777.60,788.00,777.60,,1.12%
1996-01-05,788.20,786.10,788.60,784.10,,0.13%
1996-01-08,788.20,788.10,791.80,787.60,,0.00%
...,...,...,...,...,...,...
2023-11-24,1302.97,1301.22,1308.83,1296.65,,0.21%
2023-11-27,1296.95,1301.95,1308.60,1297.65,,-0.46%
2023-11-28,1287.25,1298.41,1298.91,1285.70,,-0.75%
2023-11-29,1291.17,1288.35,1294.47,1285.14,,0.30%


## 출력

In [12]:
def country_output(ticker_name):
    data = pd.read_csv(ticker_name + ".csv").dropna()
    data.index = pd.to_datetime(data["Date"])

    output = {}

    output["previous_close"] = "{:,.2f}".format(data.iloc[-1]["Close"])
    output["today_open"] = "{:,.2f}".format(data.iloc[-1]["Open"])
    output["today_volume"] = "{:,.2f}".format(data.iloc[-1]["Volume"])
    output["avg_volume"] = "{:,.0f}".format(data["Volume"].mean())

    last_row = data.iloc[-1]
    today_range = (
        "{:,.2f}".format(last_row["Low"]) + " - " + "{:,.2f}".format(last_row["High"])
    )
    output["range_days"] = today_range

    last_year_data = data.last("52W")
    year_range = (
        "{:,.2f}".format(last_year_data["Low"].min())
        + " - "
        + "{:,.2f}".format(last_year_data["High"].max())
    )
    output["range_52"] = year_range

    output["MA_50"] = round(data["Close"].rolling(window=50).mean().iloc[-1], 1)
    output["MA_200"] = round(data["Close"].rolling(window=200).mean().iloc[-1], 1)

    return output

In [13]:
def group_output(ticker_name):
    data = pd.read_csv(ticker_name + ".csv").dropna()
    data.index = pd.to_datetime(data["Date"])

    output = {}

    output["previous_close"] = "{:,.2f}".format(data.iloc[-1]["Close"])
    output["today_open"] = "{:,.2f}".format(data.iloc[-1]["Open"])
    output["today_volume"] = "{:,.2f}".format(data.iloc[-1]["Volume"])
    output["avg_volume"] = "{:,.0f}".format(data["Volume"].mean())

    last_row = data.iloc[-1]
    today_range = (
        "{:,.2f}".format(last_row["Low"]) + " - " + "{:,.2f}".format(last_row["High"])
    )
    output["range_days"] = today_range

    last_year_data = data.last("52W")
    year_range = (
        "{:,.2f}".format(last_year_data["Low"].min())
        + " - "
        + "{:,.2f}".format(last_year_data["High"].max())
    )
    output["range_52"] = year_range

    output["MA_50"] = round(data["Close"].rolling(window=50).mean().iloc[-1], 1)
    output["MA_200"] = round(data["Close"].rolling(window=200).mean().iloc[-1], 1)

    tick = yf.Ticker(ticker_name)

    output["Beta"] = tick.info["beta"]

    num = tick.info["enterpriseValue"]
    if num >= 10**12:  # 천억 이상
        output["enterprise"] = str(round(num / 10**12, 2)) + "T"
    elif num >= 10**8:  # 백만 이상
        output["enterprise"] = str(round(num / 10**8, 2)) + "M"
    else:
        output["enterprise"] = str(num)

    output["Buy"] = tick.info["bid"]
    output["Sell"] = tick.info["ask"]

    return output

In [14]:
country_output("^KS11")

  last_year_data = data.last("52W")


{'previous_close': '2,535.29',
 'today_open': '2,512.11',
 'today_volume': '664,284.00',
 'avg_volume': '439,008',
 'range_days': '2,507.80 - 2,535.29',
 'range_52': '2,180.67 - 2,668.21',
 'MA_50': 2445.8,
 'MA_200': 2508.4}

In [15]:
country_output("^VNINDEX.VN")

  last_year_data = data.last("52W")


{'previous_close': '1,094.13',
 'today_open': '1,101.75',
 'today_volume': '723,480.00',
 'avg_volume': '256,978',
 'range_days': '1,094.13 - 1,107.45',
 'range_52': '983.67 - 1,255.11',
 'MA_50': 1110.5,
 'MA_200': 1116.9}

In [16]:
group_output("055550.KS")

  last_year_data = data.last("52W")


{'previous_close': '36,900.00',
 'today_open': '36,200.00',
 'today_volume': '1,461,987.00',
 'avg_volume': '181,582,788',
 'range_days': '36,050.00 - 36,900.00',
 'range_52': '32,400.00 - 44,900.00',
 'MA_50': 35910.0,
 'MA_200': 35527.2,
 'Beta': 0.593,
 'enterprise': '65.23T',
 'Buy': 36900.0,
 'Sell': 36950.0}

In [17]:
group_output("VCB.VN")

  last_year_data = data.last("52W")


{'previous_close': '84,700.00',
 'today_open': '85,100.00',
 'today_volume': '986,600.00',
 'avg_volume': '1,763,179',
 'range_days': '84,700.00 - 85,400.00',
 'range_52': '65,198.98 - 93,400.00',
 'MA_50': 86508.0,
 'MA_200': 83422.4,
 'Beta': 0.827,
 'enterprise': '157.46T',
 'Buy': 84700.0,
 'Sell': 84900.0}