-
Notifications
You must be signed in to change notification settings - Fork 0
mid.md
al2698 edited this page Jun 17, 2022
·
3 revisions
import pandas_datareader as web
# base
import numpy as np
import pandas as pd
# visual
import matplotlib.pyplot as plt
import mplfinance as mpf
# time
import datetime as datetime
#name為股票代號名稱 start、end為資料下載期間
name='3260.TWO'
data_source='yahoo'
start='2021-07-03'
end='2021-11-03'
# 下載股價資料
df = web.DataReader(name, data_source, start, end)
df.index = pd.DatetimeIndex(df.index)
dfnew_2330 = df.drop(["Adj Close"],axis = 1)
mc = mpf.make_marketcolors(up='r', down='g', inherit=True)
s = mpf.make_mpf_style(base_mpf_style='yahoo', marketcolors=mc)
kwargs = dict(type='candle', mav=(5,10), volume=True, figratio=(20,15), figscale=1.2,title = name, style=s)
mpf.plot(dfnew_2330, **kwargs)
#取得歷史報價並畫圖
def get_data(stock,start,end):
#加入取得報價的套件
import yfinance as yf
#歷史報價存入df(查詢標的,開始時間,結束時間)
df = yf.download(stock,start,end,interval = "1d").dropna()#download data from yahoo
#interval=1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
#period=1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
#畫圖
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(20,5))
plt.title(stock+' Adj Close')
plt.plot(df['Adj Close'])
plt.show()
return df
#整理資料與增加複雜度
def sort_data(df):
df['Open-Close'] = (df.Open - df.Close)/df.Open
df['High-Low'] = (df.High - df.Low)/df.Low
df['percent_change'] = df['Close'].pct_change()
df['std_5'] = df['percent_change'].rolling(5).std()
df['ret_5'] = df['percent_change'].rolling(5).mean()
df.dropna(inplace=True)
return df
def split_data(df):
#label
# X is the input variable
X = df[['Open-Close', 'High-Low', 'std_5', 'ret_5']]
# Y is the target or output variable
#y = np.where(df['percent_change'].shift(-1) > 0, 1, 0)
y = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
#分割訓練組跟測試組
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)
return X_train, X_test, y_train, y_test
def RandomForestClassifier(X_train, X_test, y_train, y_test):
from sklearn.ensemble import RandomForestClassifier
#from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=6000, random_state=10, n_jobs=-1, min_samples_leaf = 5)
# Create the model on train dataset
model = clf.fit(X_train, y_train)
y_pre = clf.predict(X_test)
from sklearn.metrics import classification_report
report = classification_report(y_test, y_pre)
print(report)
print('RandomForestClassifier')
return y_test, y_pre
def auc_roc(y_test, y_pre):
from sklearn import metrics
# 績效
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pre)
auc = metrics.auc(fpr, tpr)
print(auc)
import matplotlib.pyplot as plt
plt.plot(fpr,tpr,label="random forest, auc="+str(auc),color='blue')
plt.title("ROC")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc=4)
plt.show()
import numpy as np
import pandas as pd
stock='^TWII'#股票種類
df=get_data(stock,'2020-06-17','2022-06-30')#取得歷史報價並畫圖
df=sort_data(df)
df.head(10)
X_train, X_test, y_train, y_test=split_data(df)
y_test_RF, y_pre_RF=RandomForestClassifier(X_train, X_test, y_train, y_test)
auc_roc(y_test, y_pre_RF)
感覺不是很精準,accuracy連0.8都不到,應該還有很多優化的空間。
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
from datetime import datetime
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from sklearn.linear_model import LinearRegression
import pandas_datareader.data as web
name='^TWII'
data_source='yahoo'
start = datetime(2020, 1, 17)
end = datetime(2022, 6, 17)
data_SP = web.DataReader(name, data_source, start, end)
dates = list(map(lambda x: datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),list(data_SP.index)))
print(data_SP)
days_since = list(map(lambda x: (x-start).days,dates))
model = LinearRegression(fit_intercept=True)
model.fit(np.array(days_since)[:, np.newaxis],data_SP['Close'])
yfit = model.predict(np.array(days_since)[:, np.newaxis])
plt.figure()
plt.scatter(dates, yfit)
plt.scatter(dates, data_SP['Close'])
plt.xlabel('date')
plt.ylabel('Close')
plt.show()