In [None]:
import numpy as np
import pandas as pd

In [None]:
# Import data
dataDF = pd.read_csv("./combine/模型用/combine.csv")

In [None]:
# 分區建立模型時需要的資料
df1 = dataDF[dataDF["鄉鎮市區"] == "新莊區"]
df2 = dataDF[dataDF["鄉鎮市區"] == "板橋區"]
df3 = dataDF[dataDF["鄉鎮市區"] == "新店區"]
df4 = dataDF[dataDF["鄉鎮市區"] == "中和區"]

# Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing


# Select Variables
X = dataDF.iloc[:, 4:18].values
y = dataDF["Price"].values

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)


# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build Linear regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = linear_model.predict(X_train)
y_test_pred = linear_model.predict(X_test)


# Coefficients係數
print('Coefficients: {}\n'.format(linear_model.coef_))

# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))

# Polynomial Regression

In [None]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures


# Select Variables
X = dataDF.iloc[:, 4:18].values
y = dataDF["Price"].values

poly = PolynomialFeatures(degree=2).fit(X)
X_poly = poly.transform(X)

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size = 0.2, random_state = 1)


# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build Polynomial regression model
poly_model = LinearRegression()
poly_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = poly_model.predict(X_train)
y_test_pred = poly_model.predict(X_test)


# Coefficients係數
print('Coefficients: {}\n'.format(poly_model.coef_))

# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))

# RandomForest Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing


# Select Variables
X = dataDF.iloc[:, 4:18].values
y = dataDF["Price"].values

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build RandomForest Regression model
forest_model = RandomForestRegressor(n_estimators = 500, random_state = 1, n_jobs = -1, max_leaf_nodes = 145)
forest_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = forest_model.predict(X_train)
y_test_pred = forest_model.predict(X_test)


# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))

## RandomForest Regression - 各區自行建立一個模型

### 新莊區

In [None]:
# 新莊區

# Select Variables
X = df1.iloc[:, 4:18].values
y = df1["Price"].values

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build RandomForest Regression model
forest_model = RandomForestRegressor(n_estimators = 300, random_state = 1, n_jobs = -1, max_leaf_nodes = 80)
forest_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = forest_model.predict(X_train)
y_test_pred = forest_model.predict(X_test)


# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))

### 板橋區

In [None]:
# 板橋區

# Select Variables
X = df2.iloc[:, 4:18].values
y = df2["Price"].values

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)


# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build RandomForest Regression model
forest_model = RandomForestRegressor(n_estimators = 300, random_state = 1, n_jobs = -1, max_leaf_nodes = 30)
forest_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = forest_model.predict(X_train)
y_test_pred = forest_model.predict(X_test)


# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))

### 新店區

In [None]:
# 新店區

# Select Variables
X = df3.iloc[:, 4:18].values
y = df3["Price"].values

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)


# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build RandomForest Regression model
forest_model = RandomForestRegressor(n_estimators = 300, random_state = 1, n_jobs = -1, max_leaf_nodes = 80)
forest_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = forest_model.predict(X_train)
y_test_pred = forest_model.predict(X_test)


# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))

### 中和區

In [None]:
# 中和區

# Select Variables
X = df4.iloc[:, 4:18].values
y = df4["Price"].values

# Split dataset into training/testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)


# Standardization
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


# Build RandomForest Regression model
forest_model = RandomForestRegressor(n_estimators = 300, random_state = 1, n_jobs = -1, max_leaf_nodes = 50)
forest_model.fit(X_train, y_train)

# Make predictions using the testing set
y_train_pred = forest_model.predict(X_train)
y_test_pred = forest_model.predict(X_test)


# 模型效能
# Mean squared error(MSE)
print("MSE_train: {}".format((mean_squared_error(y_train, y_train_pred))))
print("MSE_test: {}".format((mean_squared_error(y_test, y_test_pred))))
# R square
print('R2_train: {}'.format(r2_score(y_train, y_train_pred)))
print('R2_test: {}'.format(r2_score(y_test, y_test_pred)))