# [2018巨量資料期中考] M064810005<BR>["台灣ETF價格預測競賽"](https://tbrain.trendmicro.com.tw/Competitions/Details/2)

>## 載入所需套件

In [1]:
import numpy as np
import pandas as pd
import talib as ta
import pandas_datareader as web

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

>## 載入資料並使用Big5格式

In [2]:
quotes_df = pd.read_csv("taetfp.csv",encoding="big5")

>## 看一下資料長相及是否有誤

In [3]:
quotes_df.head()

Unnamed: 0,代碼,日期,中文簡稱,開盤價(元),最高價(元),最低價(元),收盤價(元),成交張數(張)
0,50,20130102,元大台灣50,46.57,47.13,46.49,46.92,16487
1,50,20130103,元大台灣50,47.35,47.48,47.13,47.31,29020
2,50,20130104,元大台灣50,47.31,47.31,46.92,47.0,9837
3,50,20130107,元大台灣50,47.05,47.05,46.49,46.79,8910
4,50,20130108,元大台灣50,46.57,46.75,46.27,46.49,12507


>## 更改欄位名稱

In [4]:
quotes_df.columns = ['Code', 'Date', 'Name', 'Open', 'High', 'Low', 'Close' ,'Volume']

>## 確認資料更改有無錯誤

In [5]:
quotes_df.head()

Unnamed: 0,Code,Date,Name,Open,High,Low,Close,Volume
0,50,20130102,元大台灣50,46.57,47.13,46.49,46.92,16487
1,50,20130103,元大台灣50,47.35,47.48,47.13,47.31,29020
2,50,20130104,元大台灣50,47.31,47.31,46.92,47.0,9837
3,50,20130107,元大台灣50,47.05,47.05,46.49,46.79,8910
4,50,20130108,元大台灣50,46.57,46.75,46.27,46.49,12507


>## 看一下資料格式<br>當中發現Volume資料格式為object下面將無法計算<br>故我直接從csv檔將其格式更改為int

In [6]:
quotes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19053 entries, 0 to 19052
Data columns (total 8 columns):
Code      19053 non-null int64
Date      19053 non-null int64
Name      19053 non-null object
Open      19053 non-null float64
High      19053 non-null float64
Low       19053 non-null float64
Close     19053 non-null float64
Volume    19053 non-null int64
dtypes: float64(4), int64(3), object(1)
memory usage: 1.2+ MB


>## 將各ETF分類

In [7]:
tra50 = quotes_df.query('Code == 50')
tra51 = quotes_df.query('Code == 51')
tra52 = quotes_df.query('Code == 52')
tra53 = quotes_df.query('Code == 53')
tra54 = quotes_df.query('Code == 54')
tra55 = quotes_df.query('Code == 55')
tra56 = quotes_df.query('Code == 56')
tra57 = quotes_df.query('Code == 57')
tra58 = quotes_df.query('Code == 58')
tra59 = quotes_df.query('Code == 59')
tra690 = quotes_df.query('Code == 690')
tra692 = quotes_df.query('Code == 692')
tra701 = quotes_df.query('Code == 701')
tra713 = quotes_df.query('Code == 713')
tra6201 = quotes_df.query('Code == 6201')
tra6203 = quotes_df.query('Code == 6203')
tra6204 = quotes_df.query('Code == 6204')
tra6208 = quotes_df.query('Code == 6208')

>## 參考<br>[SKLearn Linear Regression Stock Price Prediction](https://gist.github.com/greencoder/ab37304b6d47e6d1e55b4adf96ea7b47)<br>並理解學習

>* #### 以下為元大台灣50(Code=50)之預測 ####

In [8]:
# 定義一個預測函式
def make_prediction(tra50, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra50.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十次交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據結合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra50, linreg))

Unscaled Linear Regression:
Accuracy: 0.996617444789
Predicted Closing Price: 79.32



>* #### 以下為元大中型100(Code=51)之預測 ####

In [9]:
# 定義一個預測函式
def make_prediction(tra51, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra51.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra51, linreg))

Unscaled Linear Regression:
Accuracy: 0.988225113949
Predicted Closing Price: 31.95



>* #### 以下為富邦科技(Code=52)之預測 ####

In [10]:
# 定義一個預測函式
def make_prediction(tra52, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra52.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra52, linreg))

Unscaled Linear Regression:
Accuracy: 0.995813103026
Predicted Closing Price: 53.30



>* #### 以下為元大電子(Code=53)之預測 ####

In [11]:
# 定義一個預測函式
def make_prediction(tra53, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra53.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra53, linreg))

Unscaled Linear Regression:
Accuracy: 0.996906441021
Predicted Closing Price: 34.22



>* #### 以下為元大台商50(Code=54)之預測 ####

In [12]:
# 定義一個預測函式
def make_prediction(tra54, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra54.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra54, linreg))

Unscaled Linear Regression:
Accuracy: 0.989880928634
Predicted Closing Price: 23.08



>* #### 以下為元大MSCI金融(Code=55)之預測 ####

In [13]:
# 定義一個預測函式
def make_prediction(tra55, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra55.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra55, linreg))

Unscaled Linear Regression:
Accuracy: 0.992622895002
Predicted Closing Price: 17.03



>* #### 以下為元大高股息(Code=56)之預測 ####

In [14]:
# 定義一個預測函式
def make_prediction(tra56, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra56.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra56, linreg))

Unscaled Linear Regression:
Accuracy: 0.995734460049
Predicted Closing Price: 25.16



>* #### 以下為富邦摩台(Code=57)之預測 ####

In [15]:
# 定義一個預測函式
def make_prediction(tra57, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra57.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra57, linreg))

Unscaled Linear Regression:
Accuracy: 0.994316266105
Predicted Closing Price: 48.82



>* #### 以下為富邦發達(Code=58)之預測 ####

In [16]:
# 定義一個預測函式
def make_prediction(tra58, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra58.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra58, linreg))

Unscaled Linear Regression:
Accuracy: 0.994429266658
Predicted Closing Price: 45.08



>* #### 以下為富邦金融(Code=59)之預測 ####

In [17]:
# 定義一個預測函式
def make_prediction(tra59, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra59.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra59, linreg))

Unscaled Linear Regression:
Accuracy: 0.988490381586
Predicted Closing Price: 41.94



>* #### 以下為元大富櫃50(6201)之預測 ####

In [18]:
# 定義一個預測函式
def make_prediction(tra6201, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra6201.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra6201, linreg))

Unscaled Linear Regression:
Accuracy: 0.987261516484
Predicted Closing Price: 13.31



>* #### 以下為元大MSCI台灣(6203)之預測 ####

In [19]:
# 定義一個預測函式
def make_prediction(tra6203, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra6203.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra6203, linreg))

Unscaled Linear Regression:
Accuracy: 0.995374932936
Predicted Closing Price: 36.88



>* #### 以下為永豐臺灣加權(6204)之預測 ####

In [20]:
# 定義一個預測函式
def make_prediction(tra6204, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra6204.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra6204, linreg))

Unscaled Linear Regression:
Accuracy: 0.995194187107
Predicted Closing Price: 52.14



>* #### 以下為富邦台50(6208)之預測 ####

In [21]:
# 定義一個預測函式
def make_prediction(tra6208, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra6208.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra6208, linreg))

Unscaled Linear Regression:
Accuracy: 0.996937914265
Predicted Closing Price: 46.08



>* #### 以下為兆豐藍籌30(690)之預測 ####

In [22]:
# 定義一個預測函式
def make_prediction(tra690, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra690.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra690, linreg))

Unscaled Linear Regression:
Accuracy: 0.920525750341
Predicted Closing Price: 21.62



>* #### 以下為富邦公司治理(692)之預測 ####

In [23]:
# 定義一個預測函式
def make_prediction(tra692, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra692.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra692, linreg))

Unscaled Linear Regression:
Accuracy: 0.87629073366
Predicted Closing Price: 20.90



>* #### 以下為國泰臺灣低波動30(701)之預測 ####

In [24]:
# 定義一個預測函式
def make_prediction(tra701, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra701.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra701, linreg))

Unscaled Linear Regression:
Accuracy: 0.829363462115
Predicted Closing Price: 20.81



>* #### 以下為元大台灣高息低波(713)之預測 ####

In [25]:
# 定義一個預測函式
def make_prediction(tra713, estimator):
    
    # 複製新的一個dataframe如此一來就不會動到原始資料
    df = tra713.copy()
    
    # 加入5日移動平均線技術指標
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # 加入20日移動平均線技術指標
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
    
    # 加入50日移動平均線技術指標
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # 加入Bollinger Bands技術指標
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, 
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # 加入RSI技術指標
    df['RSI'] = ta.RSI(df['Close'].values, 14)
    
    # 加入快慢線技術指標
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
    
    # 加入每日收盤價變化百分比
    df['ClosingPctChange'] = df['Close'].pct_change()
    
    # 將今天的資料複製一份
    df_today = df.iloc[-1:, :].copy()
        
    # 創一個'下一日收盤價'的新欄位
    df['NextClose'] = df['Close'].shift(-1)
    
    # 去除有NA值那列
    df.dropna(inplace=True)
    
    # 決定我們想要使用的特徵值
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
    
    # 創建目標及特徵
    X = df[features_to_fit]
    y = df['NextClose']
    
    # 創建訓練和測試資料集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
        random_state=42)
    
    # 做十倍交叉驗證並計算我們的平均準確度
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())
    
    # 將回歸與完整數據集合在一起來預測
    estimator.fit(X, y)

    # 預測今天的收盤價
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)
    
    # 回傳預測收盤價
    return next_price_prediction

# 打印出準確度及預測收盤價
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(tra713, linreg))

Unscaled Linear Regression:
Accuracy: -0.037619652552
Predicted Closing Price: 30.08

