### 다중 선형 회귀

In [5]:
import pandas as pd 

df = pd.read_csv("./week4_dataset.csv")
X = df[['x1', 'x2']]
y = df['y']

from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(X, y)

print(lin_reg.intercept_) # 2.34123...
print(lin_reg.coef_[0]) # 1.61590...
print(lin_reg.coef_[1]) # 0.01438...

# x2는 큰 영향 없다는 것을 해석할 수 있음 ! 

# 결정계수
print(lin_reg.score(X, y)) # 0.959593...

2.3412311451922108
1.6159072106092527
0.014384826255548126
0.9595937494832257


### 다항 회귀

In [12]:
import numpy as np 

# 데이터 생성
m = 100
X = 6 * np.random.rand(m, 1) - 3 # 100개의 난수, -3 to 1 
# y = 0.5x^2 + x + 2 + 노이즈
y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)

# 데이터셋 모양 바꾸기 ( x, x^2 항으로 분리 )
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree=2, include_bias=False) # 2차로 분리
X_poly = poly_features.fit_transform(X)

# 회귀 모형 적용
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)

# 결과 -> 
print(lin_reg.intercept_) 
print(lin_reg.coef_)


[2.10249927]
[[0.91465495 0.44835174]]


## 로지스틱 회귀

In [16]:
import pandas as pd
df = pd.read_csv("./week4_log.csv")
X = df[['x1', 'x2', 'x3','x4']]
y = df['y']

from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression() # 여기에 매개변수로 패널티 종류, C값
log_reg.fit(X, y)

print(log_reg.intercept_)
print(log_reg.coef_)

[5.72224497]
[[-0.44763344  0.89878042 -2.1136102  -0.86566425]]


### 라쏘 /  릿지

In [14]:
df = pd.read_csv("./week4_log.csv")
X = df[['x1', 'x2', 'x3', 'x4']]
y = df['y']

# 릿지 사용 X 로지스틱 회귀 
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X, y)
print(log_reg.intercept_)
print(log_reg.coef_)

# 릿지로 제약 설정, alpah = 1/λ
from sklearn.linear_model import Ridge
ridge_0_0001 = Ridge(alpha = 0.0001) # big lamda 
ridge_0_001 = Ridge(alpha = 0.001) 
ridge_0_01 = Ridge(alpha = 0.01)
ridge_0_1 = Ridge(alpha = 0.1)
ridge_1 = Ridge(alpha = 1)
ridge_10 = Ridge(alpha = 10)
ridge_100 = Ridge(alpha = 100) # small lamda

# 릿지로 학습
ridge_0_0001.fit(X, y)
ridge_0_001.fit(X, y)
ridge_0_01.fit(X, y)
ridge_0_1.fit(X, y)
ridge_1.fit(X, y)
ridge_10.fit(X, y)
ridge_100.fit(X, y)

# 결과 출력
print(log_reg.coef_)
print(ridge_0_0001.coef_)
print(ridge_0_001.coef_)
print(ridge_0_01.coef_)
print(ridge_0_1.coef_)
print(ridge_1.coef_)
print(ridge_10.coef_)
print(ridge_100.coef_)

[5.72224497]
[[-0.44763344  0.89878042 -2.1136102  -0.86566425]]
[[-0.44763344  0.89878042 -2.1136102  -0.86566425]]
[ 0.05808584  0.24801105 -0.21519772 -0.05888311]
[ 0.05808481  0.24800307 -0.21519699 -0.0588849 ]
[ 0.0580745   0.2479233  -0.21518962 -0.05890274]
[ 0.05796968  0.24712991 -0.21511528 -0.05908003]
[ 0.0567696   0.23959379 -0.21431851 -0.06073484]
[ 0.04033076  0.18775751 -0.20570594 -0.06892535]
[-0.02258985  0.06972631 -0.1633292  -0.06504729]
