In [None]:
# 延用上週爬取的股價資料，完成本次作業要求：
# 1. 計算技術指標（SMA、RSI、MACD）
# 2. 可視化 K 線圖
# 3. 技術指標結合機器學習（線性回歸、決策樹、隨機森林）預測股價漲跌。使用技術指標作為特徵來預測第 n 天的股價

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os
import requests, json
import plotly.graph_objects as go

In [162]:
# 爬取股價資料
def fetch_stock_data(code, year, month):
    if len(str(month)) == 1:
        month = '0' + str(month)
    query_interval = str(year) + str(month) + '01'
    url = f'https://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date={query_interval}&stockNo={code}'
    html = requests.get(url)
    content = json.loads(html.text)
    if content['total'] != 0:
        stock_data = content['data']
        col_name = content['fields']
        # 預先新增一個西元年欄位, 把民國年換成西元年
        df = pd.DataFrame(data=stock_data, columns=col_name)
        df['西元年'] = df['日期'].str[:3].astype('int') + 1911
        df['日期'] = df['西元年'].astype('str') + df['日期'].str[3:]
        df.drop(columns=['西元年'], inplace=True)
        return df
    else:
        print("查無資料")
        return None
    

n = 4
target_code = '2330'
# 爬取近 n 年股價資料
def fetch_stock_data_years(code, n):
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(weeks=52*n)
    data = []
    for year in range(start_date.year, end_date.year + 1):
        if year == end_date.year:
            for month in range(1, end_date.month + 1):
                df = fetch_stock_data(code, year, month)
                if df is not None:
                    data.append(df)
        else:
            for month in range(1, 13):
                df = fetch_stock_data(code, year, month)
            if df is not None:
                data.append(df)
    return pd.concat(data)


df = fetch_stock_data_years(target_code, n)
df['成交股數'] = df['成交股數'].str.replace(',', '').astype(int)
df['成交金額'] = df['成交金額'].str.replace(',', '').astype(int)
df['開盤價'] = df['開盤價'].str.replace(',', '').astype(float)
df['最高價'] = df['最高價'].str.replace(',', '').astype(float)
df['最低價'] = df['最低價'].str.replace(',', '').astype(float)
df['收盤價'] = df['收盤價'].str.replace(',', '').astype(float)
df['成交筆數'] = df['成交筆數'].str.replace(',', '').astype(int)
# save as csv
df.to_csv(f'stock_data_{target_code}.csv', index=False)


In [163]:
# 1. 計算技術指標（SMA、RSI、MACD）
def calculate_sma(data, period):
    return data['收盤價'].rolling(window=period).mean()

sma5 = calculate_sma(df, 5)
sma10 = calculate_sma(df, 10)
sma20 = calculate_sma(df, 20)
sma60 = calculate_sma(df, 60)


In [167]:
def calculate_rsi(data, period):
    delta = data['收盤價'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

rsi = calculate_rsi(df, 14)

In [170]:
def calculate_macd(data, slow=26, fast=12, signal=9):
    exp1 = data['收盤價'].ewm(span=fast, adjust=False).mean()
    exp2 = data['收盤價'].ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    macd_signal = macd.ewm(span=signal, adjust=False).mean()
    return macd, macd_signal

macd, macd_signal = calculate_macd(df, 26, 12, 9)


In [171]:
# 2. 可視化 K 線圖
df['SMA5'] = sma5
df['SMA10'] = sma10
df['SMA20'] = sma20
df['SMA60'] = sma60
df['RSI'] = rsi
df['MACD'] = macd
df['MACD_Signal'] = macd_signal

# 可視化 K 線圖
def plot_candlestick(data, title):
    fig = go.Figure(data=[go.Candlestick(x=data['日期'],
                                        open=data['開盤價'],
                                        high=data['最高價'],
                                        low=data['最低價'],
                                        close=data['收盤價'],
                                        increasing_line_color='red',
                                        decreasing_line_color='green')])
    
    fig.add_trace(go.Scatter(x=data['日期'], y=data['SMA5'], mode='lines', name='SMA5'))
    fig.add_trace(go.Scatter(x=data['日期'], y=data['SMA10'], mode='lines', name='SMA10'))
    fig.add_trace(go.Scatter(x=data['日期'], y=data['SMA20'], mode='lines', name='SMA20'))
    fig.add_trace(go.Scatter(x=data['日期'], y=data['SMA60'], mode='lines', name='SMA60'))
    fig.add_trace(go.Scatter(x=data['日期'], y=data['RSI'], mode='lines', name='RSI'))
    fig.add_trace(go.Scatter(x=data['日期'], y=data['MACD'], mode='lines', name='MACD'))
    fig.add_trace(go.Scatter(x=data['日期'], y=data['MACD_Signal'], mode='lines', name='MACD_Signal'))

    fig.update_layout(title=title, xaxis_title='日期', yaxis_title='價格',width=1200, height=800)
    fig.show()


plot_candlestick(df, f'{target_code} 近{n}年 K 線圖')




In [95]:
# 3. 技術指標結合機器學習（線性回歸、決策樹、隨機森林）預測股價漲跌。
# 使用()技術指標作為特徵來預測第 n 天的股價



