## 회기 모델

In [30]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, QuantileTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
plt.rc('font', family='D2Coding')



In [31]:
housing = fetch_california_housing()

In [32]:
# 이렇게 나눈 후 스케일을 옮겨야 함.
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target)


In [33]:
scalers = {
    "None" : None,
    "StandardScaler" : StandardScaler(),
    "MinMaxScaler" : MinMaxScaler(),
    "QuantileTransformer" : QuantileTransformer(output_distribution="normal") ## 4분위 변환기 ## output_distribution="normal" 정규분포 균등분위
}

In [34]:
scaling_results ={}

for name, scaler in scalers.items():
    if scaler is not None:
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    else:
        X_train_scaled = X_train
        X_test_scaled = X_test

    svr = RandomForestRegressor()
    svr.fit(X_train_scaled, y_train)

    y_pred = svr.predict(X_test_scaled)
    mae = mean_squared_error(y_test, y_pred)

    scaling_results[name] = {"mae" : mae}



In [35]:
scaling_results

{'None': {'mae': 0.25619987746623646},
 'StandardScaler': {'mae': 0.2537966615555505},
 'MinMaxScaler': {'mae': 0.25367980041626975},
 'QuantileTransformer': {'mae': 0.2545572449053433}}

In [36]:
# StandardScaler randomforest 먼저 돌려서 보라.
# 전처리 안해도 비슷하게 됨.