# **1. 단순 선형 회귀 분석**
- 전복의 나이를 예측하는 선형회귀모델을 생성하세요.
- 전복의 ‘성별’, ‘키’, ‘지름’, ‘높이’, ‘전체무게’, ‘몸통무게’, ‘내장무게’, ‘껍질무게’를 이용해 ‘껍질의 고리 수’를 예측한 뒤, **예측된 ‘껍질의 고리 수’에 1.5를 더하면 전복의 나이**가 됩니다.

In [1]:
# 기본 모듈 불러오기
import numpy as np
import pandas as pd

**1) 데이터 load 및 변형**

In [2]:
# 데이터 로드
data = pd.read_csv("abalone.csv")
data.head()
print(data.shape)

# 성별 M은 Male, F는 Female, I는 Infant 이므로 따로 열 만들기
for label in "MFI":
    data[label] = data["Sex"] == label
data.drop('Sex', axis=1, inplace=True)

(4177, 9)


**2) X, y 선택**
: y는 Rings열, X는 Rings열을 제외한 나머지를 선택하되 전부 실수가 되도록 한다.

In [3]:
# X,y 데이터 선택
y = data['Rings']

X = data.drop('Rings', axis=1)

 **3) train/test set 분리**

In [6]:
# 필요한 모듈 불러오기
from sklearn.model_selection import train_test_split

In [7]:
# train과 test set 분리 (train:test = 7:3 비율로)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

**4) 선형회귀모델 생성, 모델 예측치 구하기**

In [9]:
#필요한 모듈 불러오기
from sklearn.linear_model import LinearRegression

In [10]:
#선형회귀모델 생성 및 훈련
model = LinearRegression()
model.fit(X_train, y_train)

In [11]:
# 모델 예측치 구하기
lr_pred = model.predict(X_test)
# 모델 예측치를 활용해 최종적으로 전복의 나이를 예측
lr_pred += 1.5

**5) 모델 평가: MSE, RMSE, R2 score, corr 구하기**

In [12]:
#필요한 모듈 불러오기
from sklearn.metrics import mean_squared_error, r2_score

- MSE, RMSE

In [13]:
#mse, rmse
print(mean_squared_error(y_test, lr_pred))
print(np.sqrt(mean_squared_error(y_test, lr_pred)))

7.872306153405646
2.805763025169026


- R2 score

In [14]:
#R2 score 측정
print(r2_score(y_test, lr_pred))

0.29237505364615357


- 회귀 절편값

In [15]:
#회귀 절편 값
model.intercept_

3.3979828916929202

- 회귀 계수 값

In [16]:
#회귀 계수 값
print(model.coef_)

[ -1.17604848   9.64952577  20.44954095   7.93073669 -17.69219144
 -11.23356766   9.04855799   0.29731528   0.25810675  -0.55542202]


- 상관계수

Hint: corr 함수 이용.

In [17]:
# 상관계수 구하기
data.corr()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,M,F,I
Length,1.0,0.986812,0.827554,0.925261,0.897914,0.903018,0.897706,0.55672,0.236543,0.309666,-0.551465
Diameter,0.986812,1.0,0.833684,0.925452,0.893162,0.899724,0.90533,0.57466,0.240376,0.318626,-0.564315
Height,0.827554,0.833684,1.0,0.819221,0.774972,0.798319,0.817338,0.557467,0.215459,0.298421,-0.518552
Whole weight,0.925261,0.925452,0.819221,1.0,0.969405,0.966375,0.955355,0.54039,0.252038,0.299741,-0.557592
Shucked weight,0.897914,0.893162,0.774972,0.969405,1.0,0.931961,0.882617,0.420884,0.251793,0.263991,-0.521842
Viscera weight,0.903018,0.899724,0.798319,0.966375,0.931961,1.0,0.907656,0.503819,0.242194,0.308444,-0.556081
Shell weight,0.897706,0.90533,0.817338,0.955355,0.882617,0.907656,1.0,0.627574,0.235391,0.306319,-0.546953
Rings,0.55672,0.57466,0.557467,0.54039,0.420884,0.503819,0.627574,1.0,0.181831,0.250279,-0.436063
M,0.236543,0.240376,0.215459,0.252038,0.251793,0.242194,0.235391,0.181831,1.0,-0.512528,-0.522541
F,0.309666,0.318626,0.298421,0.299741,0.263991,0.308444,0.306319,0.250279,-0.512528,1.0,-0.464298


# **2. Polynomial features**

In [18]:
# PolynomialFeatures 라이브러리 호출
from sklearn.preprocessing import PolynomialFeatures

In [19]:
# 임의 데이터 생성

X = np.arange(6).reshape(3, 2)

df =  pd.DataFrame(X)
df.columns = ['x_1','x_2']
df

Unnamed: 0,x_1,x_2
0,0,1
1,2,3
2,4,5


In [20]:
# 차원은 2로 설정
# fit_transform 메소드를 통해 데이터 변환
# PolynomialFeatures로 변환 된 데이터를 데이터 프레임 형태로 변환
poly_features = PolynomialFeatures(degree=2)
poly = poly_features.fit_transform(df)
poly = pd.DataFrame(poly)

In [21]:
# df_poly의 컬럼을 1,x1,x2,x1^2,x1*x2,x2^2 로 변경
poly.columns = ['1','x1','x2','x1^2','x1*x2','x2^2']