In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.linear_model import LinearRegression

import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
# 데이터 다운로드 및 전처리
data_url = "https://www.openml.org/data/download/21831215/dataset"
data = pd.read_csv(data_url, sep=",", skiprows=43, header=None)
data.columns = ['Survived (target)','Pclass','Sex','Age','Fare','Embarked','relatives', 'Title']

data

In [None]:
# 설명변수(X)와 종속변수(y) 선택
X = data[['Age', 'Fare']]
y = data['Survived (target)']

# train 데이터셋과 test 데이터셋으로 분할 (train:test = 9:1)
train_size = int(len(data) * 0.9)

# train 데이터셋과 test 데이터셋을 분할합니다.
train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]

# 설명 변수(X)와 종속 변수(y)를 분할합니다.
X_train = train_data[['Age', 'Fare']]
y_train = train_data['Survived (target)']

X_train, y_train

In [None]:
X_test = test_data[['Age', 'Fare']]
y_test = test_data['Survived (target)']

X_test, y_test

In [None]:
# 모델 학습
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
#절편값
print("절편값 : ", model.intercept_)

#가중치
print("가중치 : ", model.coef_)

In [None]:
# 모델 평가: 결정계수 확인
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print("Train R^2 Score:", train_score)
print("Test R^2 Score:", test_score)

In [None]:
# 회귀식 출력
coefficients = model.coef_
intercept = model.intercept_
print("회귀식: y =", coefficients[0], "* Age +", coefficients[1], "* Fare +", intercept)

In [None]:
from sympy import symbols, exp, Function, Max, Piecewise, Matrix
from sympy.plotting import plot, plot3d, PlotGrid
from sympy.functions.elementary.hyperbolic import tanh
from sympy.printing.latex import LatexPrinter, print_latex

# 독립 변수 생성
x, y = symbols('x y')

# 회귀식
def regression_eq(x, y):
    return 0.07836937701405286 * x + 0.17987508076489225 * y + 0.54339068503426

# 3차원 도표 생성
p1 = plot3d(regression_eq(x, y), (x, 0, 6), (y, 0, 4), xlabel='Age', ylabel='Fare', zlabel='Survived (target)', show=False)

# 3차원 도표 표시
PlotGrid(1, 1, p1)

In [None]:
# single plot
def g(x):
    return 0.17987508076489225 * x + 0.54339068503426

plot(g(x), (x, 0, 4))


In [None]:
from mpl_toolkits.mplot3d import Axes3D

# 3D 그래프 생성
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

# fare, age, survived 데이터 추출
fare = data['Fare']
age = data['Age']
survived = data['Survived (target)']

# 생존 여부에 따라 데이터 색상 지정
colors = ['blue' if s == 1 else 'red' for s in survived]

# 3D 그래프에 데이터 플로팅
ax.scatter(fare, age, survived, c=colors, alpha=0.5)

# 축 레이블링
ax.set_xlabel('Fare')
ax.set_ylabel('Age')
ax.set_zlabel('Survived')

# 그래프 표시
plt.title('Survived vs Fare and Age')
plt.show()