### SVR

##### (使用網格法進行超參數篩選並進行預測)

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.metrics import r2_score

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高', '地區']
target = ['總價(萬元)']

# 分割資料集為訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=120)

# 合併訓練數據和測試數據
combined_data = pd.concat([X_train, X_test])

# 處理字串型特徵
if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

# 特徵編碼
combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別', '地區'])
# 分割回訓練集和測試集
X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]
# 特徵標準化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

param_grid = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'C': [0.1, 1, 10],
    'epsilon': [0.1, 0.01],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(estimator=SVR(), param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)

print("最佳超參數: ", grid_search.best_params_)
print("最佳得分: ", -grid_search.best_score_)

# 使用最佳超參數訓練SVR模型
best_svr = SVR(**grid_search.best_params_)
best_svr.fit(X_train_scaled, y_train)

# 進行預測
y_pred = best_svr.predict(X_test_scaled)
# 將y_test和y_pred轉換為一維陣列
y_test = y_test.values.ravel()
y_pred = y_pred.ravel()

mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'實際值': y_test, '預測值': y_pred})
print(results.head(20))

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

最佳超參數:  {'C': 10, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
最佳得分:  6471.465711493484
均方誤差（Mean Squared Error）: 3520.566711842898
均方根誤差（Root Mean Squared Error）: 59.33436366763275
平均絕對百分比誤差（Mean Absolute Percentage Error）: 8.808725860970974
平均絕對誤差（Mean Absolute Error）: 44.97565515886306
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 8.515412259867098
決定係數（R-squared score）: 0.8585731504890783
      實際值         預測值
0   700.0  652.342399
1   590.0  656.246406
2   408.2  513.100572
3   630.0  631.686744
4   335.0  352.018540
5   223.0  245.037765
6   470.0  467.918120
7   210.0  209.996013
8   558.0  576.327320
9   550.0  512.199207
10  550.0  650.626504
11  690.0  774.370351
12  570.0  618.469063
13  530.0  511.234120
14  700.0  742.643760
15  780.0  825.873368
16  450.0  540.305694
17  525.0  535.671364
18  502.0  499.838937
19  560.0  509.932572


#### 未更改超參數的SVR

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.model_selection import GridSearchCV

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積','主建物佔比','型態','管理組織','電梯','交易年份','土','建','車','房','廳','衛','樓別','樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=120)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態','管理組織','電梯','樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

# 建立 SVR 模型
svr = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# 訓練模型
svr.fit(X_train_scaled, y_train)

# 在測試集上進行預測
y_pred = svr.predict(X_test_scaled)

# 評估模型
#y_test = y_test.ravel()
# 將 y_test 轉換為一維陣列
y_test = y_test.values.ravel()
mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)


rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

print(results.head(20))

均方誤差（Mean Squared Error）: 22306.136287082434
均方根誤差（Root Mean Squared Error）: 149.35238962628765
平均絕對百分比誤差（Mean Absolute Percentage Error）: 26.47996337615271
平均絕對誤差（Mean Absolute Error）: 115.10079603961009
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 22.24885079209253
決定係數（R-squared score）: 0.10392648739442778
    Actual   Predicted
0    700.0  555.857039
1    590.0  544.222377
2    408.2  543.715301
3    630.0  551.417748
4    335.0  513.925720
5    223.0  510.365708
6    470.0  528.440661
7    210.0  506.956040
8    558.0  554.577232
9    550.0  547.839112
10   550.0  544.686719
11   690.0  548.431464
12   570.0  549.610686
13   530.0  538.608063
14   700.0  550.672772
15   780.0  549.646757
16   450.0  536.387438
17   525.0  529.482214
18   502.0  529.532698
19   560.0  532.144430


  y = column_or_1d(y, warn=True)


#### SVR預測

In [9]:
# 定義預測函數
def predict_house_price(features):
    # 預處理輸入特徵
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    # 進行預測
    prediction = best_svr.predict(input_data_scaled)

    return prediction

# 使用預測函數進行房價預測
features = {
    '總面積': 21.10,
    '主建物佔比': '74.90%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 2,
    '廳': 1,
    '衛': 1,
    '樓別': '三層',
    '樓高': 5,
    '地區':'安岐'
}

prediction = predict_house_price(features)
print('預測的房價:', prediction)


預測的房價: [427.79578539]


# KRR

### 網格

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.kernel_ridge import KernelRidge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.metrics import r2_score

# 讀取資料集
data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高', '地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=120)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別', '地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

param_grid = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'alpha': [0.1, 1, 10],
    #'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(estimator=KernelRidge(), param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)

print("最佳超參數: ", grid_search.best_params_)
print("最佳得分: ", -grid_search.best_score_)

best_krr = KernelRidge(**grid_search.best_params_)
best_krr.fit(X_train_scaled, y_train)

y_pred = best_krr.predict(X_test_scaled)

y_test = y_test.values.ravel()
y_pred = y_pred.ravel()

mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))



最佳超參數:  {'alpha': 1, 'kernel': 'poly'}
最佳得分:  6545.878468884608
均方誤差（Mean Squared Error）: 3131.5786570238783
均方根誤差（Root Mean Squared Error）: 149.35238962628765
平均絕對百分比誤差（Mean Absolute Percentage Error）: 7.729008447062065
平均絕對誤差（Mean Absolute Error）: 39.800919117818886
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 7.567108435566226
決定係數（R-squared score）: 0.8741994287542723
    Actual   Predicted
0    700.0  732.841974
1    590.0  575.872041
2    408.2  536.505002
3    630.0  580.193058
4    335.0  355.255705
5    223.0  292.921154
6    470.0  490.794171
7    210.0  227.299462
8    558.0  604.729794
9    550.0  492.624142
10   550.0  610.471678
11   690.0  722.486922
12   570.0  560.928082
13   530.0  501.768286
14   700.0  732.460691
15   780.0  777.677832
16   450.0  563.088477
17   525.0  553.143618
18   502.0  517.843098
19   560.0  526.382010


##### 失敗

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.kernel_ridge import KernelRidge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

krr = KernelRidge(kernel='rbf', alpha=1.0)

param_grid = {
    'kernel': ['linear', 'rbf', 'poly'],
    'alpha': [0.1, 1.0, 10.0],
    'gamma': ['scale', 'auto', 'float'], 
    'degree': [2, 3, 4],
    'coef0': [0.0, 0.5, 1.0],
    'kernel_params': [None]
}

grid_search = GridSearchCV(estimator=krr, param_grid=param_grid, scoring='neg_mean_squared_error', n_jobs=-1)

grid_search.fit(X_train_scaled, y_train)

best_params = grid_search.best_params_
print('最佳超參數組合:', best_params)

best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)

mse_best = mean_squared_error(y_test, y_pred_best)
print('最佳模型均方誤差（Mean Squared Error）:', mse_best)
rmse_best = np.sqrt(mse_best)
print('最佳模型均方根誤差（Root Mean Squared Error）:', rmse_best)
mape_best = mean_absolute_percentage_error(y_test, y_pred_best)
print('最佳模型平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape_best)
mae_best = np.mean(np.abs(y_test - y_pred_best))
print('最佳模型平均絕對誤差（Mean Absolute Error）:', mae_best)
smape_best = 2 * np.mean(np.abs(y_test - y_pred_best) / (np.abs(y_test) + np.abs(y_pred_best))) * 100
print('最佳模型對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape_best)
r2_best = r2_score(y_test, y_pred_best)
print('最佳模型決定係數（R-squared score）:', r2_best)

prediction_df_best = pd.DataFrame({'實際值': y_test[:30].values.reshape(-1), '預測值': y_pred_best[:30].reshape(-1)})
print(prediction_df_best)

810 fits failed out of a total of 1215.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
405 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\ASUS\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\ASUS\anaconda3\lib\site-packages\sklearn\kernel_ridge.py", line 197, in fit
    K = self._get_kernel(X)
  File "c:\Users\ASUS\anaconda3\lib\site-packages\sklearn\kernel_ridge.py", line 155, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
  File "c:\Users\ASUS\anaconda3\lib\site-packages\sklearn\metrics\pairwise.py", line 2053, in pairwise_kern

最佳超參數組合: {'alpha': 10.0, 'coef0': 0.0, 'degree': 2, 'gamma': 'scale', 'kernel': 'linear', 'kernel_params': None}
最佳模型均方誤差（Mean Squared Error）: 300114.2343666725
最佳模型均方根誤差（Root Mean Squared Error）: 547.82682881242
最佳模型平均絕對百分比誤差（Mean Absolute Percentage Error）: 1.128464187486195
最佳模型平均絕對誤差（Mean Absolute Error）: 總價(萬元)    543.332569
dtype: float64
最佳模型對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 總價(萬元)    174.151454
dtype: float64
最佳模型決定係數（R-squared score）: -8.285628630115518
       實際值          預測值
0    345.3  -187.407583
1    498.0   -85.735970
2    550.0    62.234164
3    434.5   -60.976356
4    500.0     4.795731
5    550.0    24.486572
6    400.0  -183.524953
7    330.0  -224.904045
8    680.0    94.284735
9    405.0  -102.081410
10   430.0  -116.655466
11   175.0  -285.511548
12   580.0    42.359118
13   720.0   171.412697
14   367.4  -164.703233
15   455.0  -107.930919
16   580.0   118.563030
17   573.8    34.365522
18   351.8  -165.048522
19   560.0   -20.595286
20   780

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


### 原先

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.kernel_ridge import KernelRidge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

krr = KernelRidge(kernel='rbf', alpha=1.0)

krr.fit(X_train_scaled, y_train)

y_pred = krr.predict(X_test_scaled)

y_test = y_test.values.ravel()
mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

prediction_df = pd.DataFrame({'實際值': y_test[:30].reshape(-1), '預測值': y_pred[:30].reshape(-1)})
print(prediction_df)


均方誤差（Mean Squared Error）: 25750.51269498695
均方根誤差（Root Mean Squared Error）: 160.469662849359
平均絕對百分比誤差（Mean Absolute Percentage Error）: 37.5166065456993
平均絕對誤差（Mean Absolute Error）: 170.75311408604406
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 34.37918549535486
決定係數（R-squared score）: 0.2032710530198134
       實際值         預測值
0    345.3  395.756249
1    498.0  490.231119
2    550.0  675.102784
3    434.5  537.120670
4    500.0  558.367812
5    550.0  497.537827
6    400.0  397.786258
7    330.0  396.924221
8    680.0  583.477035
9    405.0  460.074482
10   430.0  483.209771
11   175.0  208.681735
12   580.0  581.543532
13   720.0  576.886484
14   367.4  391.408938
15   455.0  427.024738
16   580.0  631.264671
17   573.8  568.783463
18   351.8  404.380054
19   560.0  601.683569
20   780.0  756.383379
21   510.0  464.716588
22   515.0  594.515240
23   720.0  484.842278
24   585.0  602.597820
25   555.0  502.044237
26   640.0  683.076359
27   600.0  612.553213
28  1200.0    0.1

### 預測

In [13]:
def predict_house_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = best_krr.predict(input_data_scaled)

    return prediction

features = {
    '總面積': 21.10,
    '主建物佔比': '74.90%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 2,
    '廳': 1,
    '衛': 1,
    '樓別': '三層',
    '樓高': 5,
    '地區':'安岐'
}

prediction = predict_house_price(features)
print('預測的房價:', prediction)


預測的房價: [[360.8780219]]


# Lasso

### 網格

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

lasso = Lasso()

param_grid = {'alpha': [0.1, 0.5, 1.0,2.0,3.0,4.0, 5.0,6.0,7.0,8.0,9.0,10.0]}

grid_search = GridSearchCV(estimator=lasso, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X_train_scaled, y_train)

best_alpha = grid_search.best_params_['alpha']
print('最佳超參數:', best_alpha)

lasso = Lasso(alpha=best_alpha)

lasso.fit(X_train_scaled, y_train)

y_pred = lasso.predict(X_test_scaled)

y_pred = y_pred.ravel()
y_test = y_test.values.ravel()

mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))

最佳超參數: 0.5
均方誤差（Mean Squared Error）: 4827.202447625001
均方根誤差（Root Mean Squared Error）: 69.47807170341589
平均絕對百分比誤差（Mean Absolute Percentage Error）: 9.65304261982415
平均絕對誤差（Mean Absolute Error）: 49.671446695641386
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 9.376237686286457
決定係數（R-squared score）: 0.8506448408032989
    Actual   Predicted
0    345.3  364.771010
1    498.0  458.363650
2    550.0  607.960482
3    434.5  484.054151
4    500.0  549.569808
5    550.0  576.435216
6    400.0  368.826356
7    330.0  321.346025
8    680.0  646.653619
9    405.0  445.876903
10   430.0  430.651676
11   175.0  263.859837
12   580.0  592.707885
13   720.0  715.631671
14   367.4  390.177293
15   455.0  446.693222
16   580.0  673.741539
17   573.8  592.090077
18   351.8  384.030625
19   560.0  529.513142


### 原先

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = [ '總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

lasso = Lasso(alpha=1.0)

lasso.fit(X_train_scaled, y_train)

y_pred = lasso.predict(X_test_scaled)

y_test = y_test.values.ravel()
mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))

均方誤差（Mean Squared Error）: 4880.999756598945
均方根誤差（Root Mean Squared Error）: 69.86415215687474
平均絕對百分比誤差（Mean Absolute Percentage Error）: 9.689368920328295
平均絕對誤差（Mean Absolute Error）: 49.85311227242094
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 9.395173629958501
決定係數（R-squared score）: 0.8489803351743481
    Actual   Predicted
0    345.3  368.823284
1    498.0  458.032821
2    550.0  608.728441
3    434.5  485.928613
4    500.0  549.464352
5    550.0  580.297229
6    400.0  372.796707
7    330.0  326.785524
8    680.0  646.740227
9    405.0  446.308918
10   430.0  430.900060
11   175.0  262.175283
12   580.0  591.754376
13   720.0  710.909984
14   367.4  396.927885
15   455.0  454.826487
16   580.0  674.812933
17   573.8  595.936688
18   351.8  386.228745
19   560.0  533.760158


### 預測

In [16]:
def predict_house_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = lasso.predict(input_data_scaled)

    return prediction

features = {
    '總面積': 24.9,
    '主建物佔比': '74.90%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 2,
    '廳': 1,
    '衛': 1,
    '樓別': '三層',
    '樓高': 5,
    '地區':'安岐'
}

prediction = predict_house_price(features)
print('預測的房價:', prediction)


預測的房價: [529.44704625]


# ElasticNet 

### 網格

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

param_grid = {'alpha': [0.1, 0.5, 1.0, 5.0, 10.0], 'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]}

elasticnet = ElasticNet()

grid_search = GridSearchCV(estimator=elasticnet, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X_train_scaled, y_train)

best_alpha = grid_search.best_params_['alpha']
best_l1_ratio = grid_search.best_params_['l1_ratio']
print('最佳超參數 alpha:', best_alpha)
print('最佳超參數 l1_ratio:', best_l1_ratio)

elasticnet = ElasticNet(alpha=best_alpha, l1_ratio=best_l1_ratio)

elasticnet.fit(X_train_scaled, y_train)

y_pred = elasticnet.predict(X_test_scaled)

y_pred = y_pred.ravel()
y_test = y_test.values.ravel()

mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))

最佳超參數 alpha: 0.1
最佳超參數 l1_ratio: 0.9
均方誤差（Mean Squared Error）: 4906.500982854111
均方根誤差（Root Mean Squared Error）: 70.04642020013664
平均絕對百分比誤差（Mean Absolute Percentage Error）: 9.657610639153228
平均絕對誤差（Mean Absolute Error）: 49.77374053001016
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 9.376330759039037
決定係數（R-squared score）: 0.8481913192280777
    Actual   Predicted
0    345.3  362.598763
1    498.0  462.735963
2    550.0  611.008344
3    434.5  487.480928
4    500.0  553.136032
5    550.0  574.875264
6    400.0  366.534249
7    330.0  323.983979
8    680.0  644.704882
9    405.0  447.484637
10   430.0  432.791010
11   175.0  264.454358
12   580.0  592.395245
13   720.0  720.298968
14   367.4  385.828705
15   455.0  442.558348
16   580.0  669.886426
17   573.8  585.968018
18   351.8  384.417675
19   560.0  529.007008


### 原先

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別','地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

elasticnet = ElasticNet(alpha=1.0, l1_ratio=0.5)

elasticnet.fit(X_train_scaled, y_train)

y_pred = elasticnet.predict(X_test_scaled)

y_test = y_test.values.ravel()
mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)

rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)

mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)

mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)

smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)

r2 = r2_score(y_test, y_pred)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))

均方誤差（Mean Squared Error）: 10185.997569598841
均方根誤差（Root Mean Squared Error）: 100.92570321577573
平均絕對百分比誤差（Mean Absolute Percentage Error）: 13.750893024758803
平均絕對誤差（Mean Absolute Error）: 66.57035069597885
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 12.838932616898568
決定係數（R-squared score）: 0.6848420373723616
    Actual   Predicted
0    345.3  428.548076
1    498.0  514.486529
2    550.0  637.042080
3    434.5  552.855760
4    500.0  623.837346
5    550.0  600.298044
6    400.0  429.918948
7    330.0  422.721421
8    680.0  600.595160
9    405.0  470.207728
10   430.0  457.966651
11   175.0  293.080270
12   580.0  586.070339
13   720.0  693.980877
14   367.4  431.741604
15   455.0  503.587937
16   580.0  617.917232
17   573.8  558.963470
18   351.8  450.190012
19   560.0  562.786307


### 預測

In [19]:
def predict_house_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = elasticnet.predict(input_data_scaled)

    return prediction
features = {
    '總面積': 24.9,
    '主建物佔比': '74.90%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 2,
    '廳': 1,
    '衛': 1,
    '樓別': '三層',
    '樓高': 5,
    '地區':'安岐'
}

prediction = predict_house_price(features)
print('預測的房價:', prediction)

預測的房價: [473.25797244]


# 隨機森林

### 網格

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高','地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別', '地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

param_grid = {
    'n_estimators': [50, 100,150, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 4,6,8,10]
}

rf = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X_train_scaled, y_train.values.ravel())

print('最佳超參數:', grid_search.best_params_)

best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test_scaled)
y_test = y_test.values.ravel()
y_pred = y_pred.ravel()

mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = best_model.score(X_test_scaled, y_test)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))

最佳超參數: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 150}
均方誤差（Mean Squared Error）: 4467.26787020883
均方根誤差（Root Mean Squared Error）: 66.83762316396978
平均絕對百分比誤差（Mean Absolute Percentage Error）: 7.092196795339206
平均絕對誤差（Mean Absolute Error）: 38.40246615066204
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 7.004581758164491
決定係數（R-squared score）: 0.8617813296275534
    Actual   Predicted
0    345.3  395.069333
1    498.0  489.360000
2    550.0  646.683333
3    434.5  482.660000
4    500.0  556.413333
5    550.0  550.460000
6    400.0  370.634667
7    330.0  413.306667
8    680.0  677.133000
9    405.0  424.506667
10   430.0  427.152000
11   175.0  221.900000
12   580.0  584.300000
13   720.0  709.101111
14   367.4  397.884000
15   455.0  456.883333
16   580.0  582.815556
17   573.8  567.646667
18   351.8  398.908000
19   560.0  525.906667


### 原先

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

data = pd.read_csv('14金寧實價登錄(主建物占比100%).csv')

features = ['總面積', '主建物佔比', '型態', '管理組織', '電梯', '交易年份', '土', '建', '車', '房', '廳', '衛', '樓別', '樓高', '地區']
target = ['總價(萬元)']

X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.3, random_state=42)

combined_data = pd.concat([X_train, X_test])

if combined_data['主建物佔比'].dtype != object:
    combined_data['主建物佔比'] = combined_data['主建物佔比'].astype(str)
combined_data['主建物佔比'] = combined_data['主建物佔比'].str.replace('%', '').astype(float)

combined_data_encoded = pd.get_dummies(combined_data, columns=['型態', '管理組織', '電梯', '樓別', '地區'])

X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

rf = RandomForestRegressor(n_estimators=100, random_state=42)

rf.fit(X_train_scaled, y_train.values.ravel())

y_pred = rf.predict(X_test_scaled)
y_test = y_test.values.ravel()
y_pred = y_pred.ravel()

mse = mean_squared_error(y_test, y_pred)
print('均方誤差（Mean Squared Error）:', mse)
rmse = np.sqrt(mse)
print('均方根誤差（Root Mean Squared Error）:', rmse)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('平均絕對百分比誤差（Mean Absolute Percentage Error）:', mape)
mae = np.mean(np.abs(y_test - y_pred))
print('平均絕對誤差（Mean Absolute Error）:', mae)
smape = 2 * np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred))) * 100
print('對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）:', smape)
r2 = rf.score(X_test_scaled, y_test)
print('決定係數（R-squared score）:', r2)

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))


均方誤差（Mean Squared Error）: 4392.37127840869
均方根誤差（Root Mean Squared Error）: 66.27496720790354
平均絕對百分比誤差（Mean Absolute Percentage Error）: 7.121916822096383
平均絕對誤差（Mean Absolute Error）: 38.495409656116756
對稱平均絕對百分比誤差（Symmetric Mean Absolute Percentage Error）: 7.027359417206641
決定係數（R-squared score）: 0.8640986536911224
    Actual   Predicted
0    345.3  396.130000
1    498.0  491.580000
2    550.0  642.314000
3    434.5  469.660000
4    500.0  557.790000
5    550.0  550.850000
6    400.0  370.654000
7    330.0  408.510000
8    680.0  682.469500
9    405.0  424.520000
10   430.0  424.980000
11   175.0  219.950000
12   580.0  585.400000
13   720.0  708.151667
14   367.4  397.026000
15   455.0  458.635000
16   580.0  583.043333
17   573.8  567.210000
18   351.8  400.806000
19   560.0  523.240000


### 預測

In [22]:
def predict_house_price(features):

    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = best_model.predict(input_data_scaled)

    return prediction

features = {
    '總面積': 24.9,
    '主建物佔比': '74.90%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 2,
    '廳': 1,
    '衛': 1,
    '樓別': '三層',
    '樓高': 5,
    '地區':'安岐'
}

prediction = predict_house_price(features)
print('預測的房價:', prediction)

預測的房價: [483.21222222]


# 預測

In [26]:
def SVR_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = best_svr.predict(input_data_scaled)

    return prediction
def KRR_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = best_krr.predict(input_data_scaled)

    return prediction
def Lasso_price(features):

    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = lasso.predict(input_data_scaled)

    return prediction
def ElasticNet_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = elasticnet.predict(input_data_scaled)

    return prediction
def rf_price(features):
    input_data = pd.DataFrame([features], columns=features)
    input_data['主建物佔比'] = input_data['主建物佔比'].str.replace('%', '').astype(float)
    input_data_encoded = pd.get_dummies(input_data, columns=['型態','管理組織','電梯','樓別','地區']).reindex(columns=X_train_encoded.columns, fill_value=0)
    input_data_scaled = scaler.transform(input_data_encoded)

    prediction = best_model.predict(input_data_scaled)

    return prediction

features = {
    '總面積': 21.10,
    '主建物佔比': '74.90%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 2,
    '廳': 1,
    '衛': 1,
    '樓別': '三層',
    '樓高': 5,
    '地區':'安岐'
}
prediction = SVR_price(features)
print('SVR預測:', prediction)
prediction = KRR_price(features)
print('KRR預測:', prediction)
prediction = Lasso_price(features)
print('Lasso預測:', prediction)
prediction = ElasticNet_price(features)
print('ElasticNet預測:', prediction)
prediction = rf_price(features)
print('隨機森林預測:', prediction)


SVR預測: [413.52561905]
KRR預測: [[360.8780219]]
Lasso預測: [467.45182023]
ElasticNet預測: [447.66783455]
隨機森林預測: [415.31733333]


In [27]:
features = {
    '總面積': 30.36,
    '主建物佔比': '81.42%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 3,
    '廳': 2,
    '衛': 2,
    '樓別': '一層',
    '樓高': 5,
    '地區':'西浦頭'
}
prediction = SVR_price(features)
print('SVR預測:', prediction)
prediction = KRR_price(features)
print('KRR預測:', prediction)
prediction = Lasso_price(features)
print('Lasso預測:', prediction)
prediction = ElasticNet_price(features)
print('ElasticNet預測:', prediction)
prediction = rf_price(features)
print('隨機森林預測:', prediction)


SVR預測: [622.11715451]
KRR預測: [[613.21932681]]
Lasso預測: [672.83319107]
ElasticNet預測: [600.41909932]
隨機森林預測: [671.56]


In [28]:
features = {
    '總面積': 34.9,
    '主建物佔比': '77.98%',
    '型態': '華廈(10層含以下有電梯)',
    '管理組織': '有',
    '電梯': '有',
    '交易年份': 112,
    '土': 1,
    '建': 1,
    '車': 0,
    '房': 3,
    '廳': 2,
    '衛': 2,
    '樓別': '四層',
    '樓高': 5,
    '地區':'伯玉路二段'
}
prediction = SVR_price(features)
print('SVR預測:', prediction)
prediction = KRR_price(features)
print('KRR預測:', prediction)
prediction = Lasso_price(features)
print('Lasso預測:', prediction)
prediction = ElasticNet_price(features)
print('ElasticNet預測:', prediction)
prediction = rf_price(features)
print('隨機森林預測:', prediction)


SVR預測: [491.33317583]
KRR預測: [[536.93807697]]
Lasso預測: [541.01601984]
ElasticNet預測: [524.39709091]
隨機森林預測: [622.485]
