# Voting - Regression
- 동일한 데이터셋에 대해 여러가지 모형 이용

## 패키지 로딩

In [1]:
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

import numpy as np
import pandas as pd

## 데이터 로딩 및 확인

In [5]:
boston = pd.read_csv('./dataset/HousingData.csv')
boston.head()
boston.fillna(method='ffill', inplace=True)
# method='ffill': 결측치의 바로 앞의 값으로 채워넣기

x = boston.drop('MEDV', axis=1)
y = boston['MEDV']

In [8]:
from sklearn.preprocessing import StandardScaler

scaled_x = StandardScaler().fit_transform(x)

## 모델 생성 및 분할

In [9]:
lasso = Lasso(alpha=0.05) # L1 규제, 모든 항에 일정한 규제 강도값 빼기
ridge = Ridge(alpha=1) # L2규제, 각 가중치에 비례해서 규제 강도값 빼기
linear = LinearRegression()

vo_r = VotingRegressor(estimators=[('LASSO', lasso), ('RIDGE', ridge),('LINEAR', linear)])  # estimators=: 사용할 모델들 튜플값으로 넣기 (사용할이름, 모델)

x_train, x_test, y_train, y_test = train_test_split(scaled_x, y, train_size=0.8, random_state=10)

## 학습 및 평가

In [10]:
vo_r.fit(x_train, y_train)
y_hat = vo_r.predict(x_test)

print(f'RMSE:{np.sqrt(mean_squared_error(y_test, y_hat)):.3f}')
print(f'결정계수: {r2_score(y_test, y_hat):.3f}')

RMSE:5.895
결정계수: 0.668


# Voting - Classifier

## 패키지 로딩

In [13]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve, roc_auc_score, confusion_matrix
from sklearn.model_selection import train_test_split

import pandas as pd

## 데이터 로딩 및 확인

In [14]:
breast = load_breast_cancer()

df = pd.DataFrame(breast.data, columns=breast.feature_names)
df['target'] = breast.target

In [15]:
data_x = breast.data
data_y = breast.target