In [1]:
#pip install scikit-learn




### Example: 鳶尾花預測

In [2]:
import numpy as np
from sklearn.datasets import load_iris #資料集
from sklearn.model_selection import train_test_split #訓練、測試集分割工具
from sklearn.ensemble import RandomForestClassifier #RF
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


iris = load_iris() # 載入數據集
X = iris.data #input
y = iris.target #output

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 切分數據集為訓練集和測試集

clf = RandomForestClassifier(n_estimators=100, random_state=42) # 初始化RandomForestClassifier

clf.fit(X_train, y_train) #訓練模型

y_pred = clf.predict(X_test) #預測

accuracy = accuracy_score(y_test, y_pred) # 評估模型
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:") # 打印混淆矩陣
print(confusion_matrix(y_test, y_pred))

Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]


### Example: 股價預測

In [3]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 步驟1：數據收集
symbol = 'AAPL'
data = yf.download(symbol, start='2020-01-01', end='2023-01-01')

# 步驟2：數據預處理
data['SMA_10'] = data['Close'].rolling(window=10).mean() #10天滾動平均
data['SMA_50'] = data['Close'].rolling(window=50).mean() #50天滾動平均
data['Price_Change'] = data['Close'].pct_change() #每日收盤價格變動

[*********************100%%**********************]  1 of 1 completed


In [5]:
# 刪除NA值
data.dropna(inplace=True)

# 步驟3：標籤生成
data['Target'] = np.where(data['Close'].shift(-1) > data['Close'], 1, 0) #若true, 賦值1；若false，賦值0

# 選擇特徵和目標變量
features = ['SMA_10', 'SMA_50', 'Price_Change']
X = data[features]
y = data['Target']

# 切分數據集為訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=False)

# 步驟4：模型訓練與評估
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# 評估模型
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.52
Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.49      0.51       111
           1       0.50      0.56      0.53       102

    accuracy                           0.52       213
   macro avg       0.52      0.52      0.52       213
weighted avg       0.52      0.52      0.52       213

Confusion Matrix:
[[54 57]
 [45 57]]
