In [3]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing

# Load the California housing dataset
data = fetch_california_housing(as_frame=True)

df = pd.DataFrame(data=data.data, columns=data.feature_names)
df['PRICE'] = data.target


X = df.drop('PRICE', axis=1)
y = df['PRICE']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def get_mse(model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse

# 모델별 MSE 계산
lr_mse = get_mse(LinearRegression())
dt_mse = get_mse(DecisionTreeRegressor(random_state=42))
rf_mse = get_mse(RandomForestRegressor(random_state=42))
xgb_mse = get_mse(XGBRegressor(random_state=42, objective='reg:squarederror'))

# 결과 출력
print(f"Linear Regression의 MSE는 {lr_mse:.4f} 입니다.")
print(f"Decision Tree Regression의 MSE는 {dt_mse:.4f} 입니다.")
print(f"Random Forest Regression의 MSE는 {rf_mse:.4f} 입니다.")
print(f"XGBoost Regression의 MSE는 {xgb_mse:.4f} 입니다.")


Linear Regression의 MSE는 0.5559 입니다.
Decision Tree Regression의 MSE는 0.4952 입니다.
Random Forest Regression의 MSE는 0.2554 입니다.
XGBoost Regression의 MSE는 0.2246 입니다.
