In [1]:
import numpy as np
import pandas as pd

# 선형회귀, ridge, lasso 관련 sklearn 라이브러리 불러오기

from sklearn import linear_model
from sklearn.metrics import mean_squared_error

# k-fold 교차검증 관련 라이브러리
from sklearn.model_selection import KFold

In [2]:
# Ridge regression 객체 생성 

reg = linear_model.Ridge(alpha =0.1)

# alpha : tuning parameter(lambda)

In [3]:
# 훈련

train_data = ([[0,0],[0,0],[1,1]],[0,.1,1])

X,y = train_data[0], train_data[1]

In [4]:
reg.fit(X,y)

Ridge(alpha=0.1)

In [5]:
# test data >> 예측

reg.predict([[0,1]])

array([0.51395349])

In [6]:
pred_test = reg.predict([[0,1]])

In [7]:
reg.coef_ # 회귀계수 

array([0.44186047, 0.44186047])

In [8]:
# Ridge regression 객체 생성 

reg = linear_model.Ridge(alpha =1.0)

# alpha : tuning parameter(lambda)

# 훈련

train_data = ([[0,0],[0,0],[1,1]],[0,.1,1])

X,y = train_data[0], train_data[1]

reg.fit(X,y)

reg.coef_ # 회귀계수 # 관심사 

# alpha(=규제, 조절변수, lambda이다.) 값이 커지면 coefficient 값 작아진다. 

array([0.27142857, 0.27142857])

In [9]:
# 라쏘 L1 맛보기

reg = linear_model.Lasso(alpha=0.1)

In [10]:
# 훈련 

train_data = ([[0,0],[0,0],[1,1]],[0,.1,1])

X,y = train_data[0], train_data[1]

reg.fit(X,y)


Lasso(alpha=0.1)

In [11]:
# test 데이터로 예측 

pred_test = reg.predict([[0,1]])
pred_test


array([0.2])

In [12]:
# coeffient

reg.alpha

# 두번째 coefficient 값이 0이 되는 것을 확인 

0.1

In [13]:
# 라쏘 L1 맛보기

reg = linear_model.Lasso(alpha=1.0)

# 훈련 

train_data = ([[0,0],[0,0],[1,1]],[0,.1,1])

X,y = train_data[0], train_data[1]

reg.fit(X,y)

# test 데이터로 예측 

pred_test = reg.predict([[0,1]])
pred_test

array([0.36666667])

In [14]:
reg.coef_

array([0., 0.])

In [17]:
# advertising 

path = '/content/drive/MyDrive/새싹sw-인공지능 서비스 개발자/CLASSROOM/CODE PDF/머신러닝/Advertising.csv'
ad = pd.read_csv(path, index_col =0)
ad = ad.drop(ad.columns[0],axis=1)
ad

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [21]:
# 라쏘 적용 >> 최적의 람다 선정 

# k-fold CV의 fold 수 지정

n_fold=5

for t_param in (0.01,1,1000):
  # lambda 값 설정 : 0.01,1,1000
  print('Lambda: ', t_param)
  kf = KFold(n_splits=n_fold)

  idx = 1

  sum_val_mse =0 
  for train, val in kf.split(ad):
    print('Fold: # ',idx)

    # training set >> features, response 분리
    train_X = ad.iloc[train][['TV', 'Radio', 'Newspaper']]
    train_y = ad.iloc[train][['Sales']]

    # validation set >> features, response 분리
    val_X = ad.iloc[val][['TV', 'Radio', 'Newspaper']]
    val_y = ad.iloc[val][['Sales']]

    # Lasso 객체 생성
    regr = linear_model.Lasso(alpha=t_param)

    # train data 활용, model 적합 
    regr.fit(train_X[['TV', 'Radio', 'Newspaper']], train_y)

    # validation data, 예측
    val_y_pred = regr.predict(val_X[['TV', 'Radio', 'Newspaper']],)

    # coefficients 출력

    print('coefficients: ', regr.coef_)

    # validation MSE
    val_mse = mean_squared_error(val_y, val_y_pred)

    # validation MSE 합계 
    sum_val_mse += val_mse

    print('--------------------------------------------')

    idx +=1

  print('average validation mse: %.3f'%(sum_val_mse/n_fold))
  print('*************************************************')

Lambda:  0.01
Fold: #  1
coefficients:  [0.04585721 0.18786616 0.0035984 ]
--------------------------------------------
Fold: #  2
coefficients:  [0.04513011 0.18790415 0.00140072]
--------------------------------------------
Fold: #  3
coefficients:  [ 0.04697919  0.18866641 -0.00232364]
--------------------------------------------
Fold: #  4
coefficients:  [ 0.0431589   0.20006652 -0.00754594]
--------------------------------------------
Fold: #  5
coefficients:  [ 0.04725115  0.17985369 -0.00090631]
--------------------------------------------
average validation mse: 3.073
*************************************************
Lambda:  1
Fold: #  1
coefficients:  [0.04582866 0.18396308 0.00216856]
--------------------------------------------
Fold: #  2
coefficients:  [0.04501995 0.1840591  0.00038057]
--------------------------------------------
Fold: #  3
coefficients:  [ 0.04686138  0.18301965 -0.        ]
--------------------------------------------
Fold: #  4
coefficients:  [ 0.04313