# Stock Analysis Tutorial

- Reference: https://excelsior-cjh.tistory.com/109
- For learning Beautiful Soup Tutorial: https://www.youtube.com/watch?v=GjKQ6V_ViQE

### Import libraries

In [1]:
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import plotly.offline as offline
import plotly.graph_objects as go
from plotly.subplots import make_subplots

### Get the stock data from Korea Exchange (KRX)

- https://kind.krx.co.kr/corpgeneral/corpList.do?method=loadInitPage

In [2]:
# You should open 'stock_data.xls' file using pd.read_html() function
stock_data = pd.read_html('stock_data.xls', converters={'종목코드':str})[0]
stock_data = stock_data.iloc[:, :2]
stock_data = stock_data.rename(columns={'회사명':'Name', '종목코드': 'Code'})
stock_data.head()

Unnamed: 0,Name,Code
0,상상인제3호스팩,415580
1,신영스팩7호,419270
2,포바이포,389140
3,신한제9호스팩,405640
4,미래에셋비전스팩1호,412930


### Get the table of daily stock price from NAVER Finance

- https://finance.naver.com/

In [3]:
# Define function that returns stock page url
def find_stock_page_url(stock_name, stock_data):
    stock_code = stock_data.loc[stock_data['Name'] == f'{stock_name}']['Code'].to_string(index=False)
    url = f'https://finance.naver.com/item/sise.naver?code={stock_code}'
    return url

stock_name = 'NAVER'
stock_page_url = find_stock_page_url(stock_name, stock_data)

response = requests.get(stock_page_url)
soup = bs(response.content, 'html.parser')
iframe_src = soup.select('iframe[title="일별 시세"]')[0].attrs['src']
iframe_src_url = 'https://finance.naver.com' + iframe_src

# Using selenium 
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(iframe_src_url)
iframe_src_html = driver.page_source

iframe_src_soup = bs(iframe_src_html)
sise_page_list = iframe_src_soup.select('table.Nnavi a')[:10]

sise_df = pd.DataFrame()

for page in sise_page_list:
    sise_page_url = 'https://finance.naver.com' + str(page['href'])
    driver.get(sise_page_url)
    sise_page_html = driver.page_source
    sise_page_table = pd.read_html(sise_page_html, header=0)[0]
    sise_page_table.dropna(how='any', inplace=True)
    sise_df = sise_df.append(sise_page_table)

sise_df = sise_df.rename(columns={'날짜':'Date', '종가':'Closing Price', '전일비':'Change', '시가':'Opening Price', '고가':'Highest', '저가':'Lowest', '거래량':'Volume'})
sise_df[['Closing Price', 'Change', 'Opening Price', 'Highest', 'Lowest', 'Volume']] = sise_df[['Closing Price', 'Change', 'Opening Price', 'Highest', 'Lowest', 'Volume']].apply(pd.to_numeric)
sise_df['Date'] = pd.to_datetime(sise_df['Date'])
sise_df = sise_df.sort_values(by='Date', ascending=True, ignore_index=True)
sise_df.head()



Current google-chrome version is 101.0.4951
Get LATEST chromedriver version for 101.0.4951 google-chrome
Driver [C:\Users\Ji Hun\.wdm\drivers\chromedriver\win32\101.0.4951.41\chromedriver.exe] found in cache


Unnamed: 0,Date,Closing Price,Change,Opening Price,Highest,Lowest,Volume
0,2021-12-17,384500.0,3000.0,387000.0,387500.0,383500.0,505284.0
1,2021-12-20,373000.0,11500.0,381500.0,383000.0,373000.0,630690.0
2,2021-12-21,375000.0,2000.0,370000.0,379000.0,370000.0,480367.0
3,2021-12-22,378500.0,3500.0,376000.0,380000.0,375500.0,263513.0
4,2021-12-23,378000.0,500.0,379500.0,380500.0,374500.0,354342.0


### Plot OHLC chart of a stock using plotly

- https://plotly.com/

In [4]:
offline.init_notebook_mode(connected=True)

# Reference: https://stackoverflow.com/questions/64689342/plotly-how-to-add-volume-to-a-candlestick-chart

# Create subplots
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1, subplot_titles=(stock_name, 'Volume'), row_width=[0.2, 0.7])

# Plot OHLC 
fig = fig.add_trace(go.Candlestick(x=sise_df['Date'], open=sise_df['Opening Price'], high=sise_df['Highest'], low=sise_df['Lowest'], close=sise_df['Closing Price'],  name='OHLC'), row=1, col=1)

# Bar trace for volumes 
fig = fig.add_trace(go.Bar(x=sise_df['Date'], y=sise_df['Volume'], showlegend=False), row=2, col=1)

# Do not show OHLC's rangeslider plot 
fig.update(layout_xaxis_rangeslider_visible=False)

# Hide weekends and holidays
# data from krx holidays: http://open.krx.co.kr/contents/MKD/01/0110/01100305/MKD01100305.jsp
xls_data = ['krx_holidays_data_2021.xls', 'krx_holidays_data_2022.xls']
df = pd.DataFrame()
for data in xls_data:
    holidays = pd.read_excel(data)
    df = df.append(holidays)
holidays_date = df.iloc[:, 0].to_list()
fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"]), dict(values=holidays_date)])

fig.show()