In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 데이터 프레임 생성
data = {
    'species': ['Adelie'] * 9,
    'island': ['Torgersen'] * 9,
    'culmen_length_mm': [39.1, 39.5, 40.3, 43.9, 36.7, 39.3, 38.9, 39.2, 34.1],
    'culmen_depth_mm': [18.7, 17.4, 18, 17.2, 19.3, 20.6, 17.8, 19.6, 18.1],
    'flipper_length_mm': [181, 186, 195, 200.9, 193, 190, 181, 195, 193],
    'body_mass_g': [3750, 3800, 3250, 4202, 3450, 3650, 3625, 4675, 3475],
    'sex': ['MALE', 'FEMALE', 'FEMALE', 'FEMALE', 'FEMALE', 'MALE', 'FEMALE', 'MALE', 'MALE']
}

df = pd.DataFrame(data)

# 성별을 숫자로 인코딩 (MALE: 1, FEMALE: 0)
df['sex'] = df['sex'].map({'MALE': 1, 'FEMALE': 0})

# 특성과 타겟 변수 분리
X = df[['culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'sex']]
y = df['body_mass_g']

# 학습 데이터와 테스트 데이터로 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 선형 회귀 모델 생성 및 학습
model = LinearRegression()
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 모델 평가
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# 모델 파라미터
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)


Mean Squared Error: 459961.5607839721
R^2 Score: -1.403064480830548
Coefficients: [ 73.90786433 -73.18235952   1.94751693 260.49073638]
Intercept: 1627.0536471871574
