## 訓練和評估個別模型

In [15]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score

# 載入數據集
data = fetch_california_housing()
X, y = data.data, data.target

# 分割數據
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定義模型
models = {
    'Linear Regression': LinearRegression(),
    'SVR': SVR(),
    'Decision Tree': DecisionTreeRegressor(random_state=42)
}

# Train each model, predict and evaluate
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    results[name] = {'MSE': mse, 'R2': r2}
    print(f"{name} - MSE: {mse:.2f}, R2: {r2:.2f}")

# Define parameter grid for RandomizedSearchCV
param_grid = {
    'Linear Regression': {'fit_intercept': [True, False]},
    'SVR': {'C': [1, 10, 100], 'gamma': ['scale', 'auto']},
    'Decision Tree': {'max_depth': [None, 10, 20], 'min_samples_leaf': [1, 2, 4]}
}

# RandomizedSearchCV to optimize each model
optimized_results = {}
best_models = {}
for name, model in models.items():
    random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid[name], n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)
    random_search.fit(X_train, y_train)
    best_model = random_search.best_estimator_

    # Save the best model
    best_models[name] = best_model
    predictions = best_model.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    optimized_results[name] = {'MSE': mse, 'R2': r2, 'Best Params': random_search.best_params_}
    print(f"Optimized {name} - MSE: {mse:.2f}, R2: {r2:.2f}, Best Params: {random_search.best_params_}")


# 選擇最佳模型
top_models = sorted(optimized_results, key=optimized_results.get("MSE"))[:2]  # 選擇表現最佳的兩個模型



Linear Regression - MSE: 0.56, R2: 0.58
SVR - MSE: 1.33, R2: -0.02
Decision Tree - MSE: 0.50, R2: 0.62
Optimized Linear Regression - MSE: 0.56, R2: 0.58, Best Params: {'fit_intercept': True}




Optimized SVR - MSE: 0.66, R2: 0.50, Best Params: {'gamma': 'scale', 'C': 100}




Optimized Decision Tree - MSE: 0.41, R2: 0.69, Best Params: {'min_samples_leaf': 4, 'max_depth': 10}


In [14]:
best_models

['Decision Tree', 'Linear Regression']

## 堆疊最優模型

In [16]:
# 基於最佳模型的預測來訓練元模型
train_preds = []
test_preds = []

for name in top_models:
    model = best_models[name]
    train_preds.append(model.predict(X_train))
    test_preds.append(model.predict(X_test))

# 轉換預測數據為 NumPy 矩陣
train_meta_features = np.column_stack(train_preds)
test_meta_features = np.column_stack(test_preds)

# 訓練元模型
meta_model = LinearRegression()
meta_model.fit(train_meta_features, y_train)

# 使用元模型進行預測
final_predictions = meta_model.predict(test_meta_features)

# 計算預測的均方誤差
mse = mean_squared_error(y_test, final_predictions)
print(f"Stacked Model Mean Squared Error: {mse:.2f}")


Stacked Model Mean Squared Error: 0.40
