In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

titanic_df = pd.read_csv('/content/titanic_train.csv')
titanic_df.head(3)

In [None]:
sns.barplot(x='Sex', y = 'Survived', data=titanic_df)
titanic_df.groupby(['Sex','Survived'])['Survived'].count()

In [None]:
sns.barplot(x='Pclass', y='Survived', hue='Sex', data=titanic_df)

In [None]:
print('\n ### 데이터 정보 ###  \n')
print(titanic_df.info())

In [None]:
titanic_df.describe()

In [None]:
titanic_df.describe().transpose()

In [None]:
titanic_df['Age'].fillna(titanic_df['Age'].mean(), inplace=True)
titanic_df['Cabin'].fillna('N', inplace=True)
titanic_df['Embarked'].fillna('N', inplace=True)

titanic_df.info()

In [None]:
titanic_df.dtypes[titanic_df.dtypes == 'object'].index.tolist()

In [None]:
print(' Sex 값 분포 :\n',titanic_df['Sex'].value_counts())
print('\n Cabin 값 분포 :\n',titanic_df['Cabin'].value_counts())
print('\n Embarked 값 분포 :\n',titanic_df['Embarked'].value_counts())

In [None]:
from sklearn.preprocessing import LabelEncoder
features = ['Cabin', 'Sex', 'Embarked']

le = LabelEncoder()
for feature in features:
    le.fit(titanic_df[feature])
    titanic_df[feature] = le.transform(titanic_df[feature])

titanic_df.head()

In [None]:
#이상치 처리를 위한 시각화
numeric_features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Cabin']

plt.figure(figsize=(15, 10))

for i, feature in enumerate(numeric_features, 1):
    plt.subplot(2, 3, i)
    sns.boxplot(x=titanic_df[feature])
    plt.title(f'Boxplot of {feature}')

plt.tight_layout()
plt.show()

In [None]:
titanic_df.drop(['PassengerId','Name','Ticket'], axis=1, inplace=True)
titanic_df.head(3)

In [None]:
y_titanic_df = titanic_df['Survived']
y_titanic_df.head()

In [None]:
X_titanic_df= titanic_df.drop('Survived',axis=1, inplace=False)
X_titanic_df.head(3)

In [None]:
from sklearn.metrics import r2_score

# 실제값
y_true = [3, -0.5, 2, 7]
# 예측값
y_pred = [2.5, 0.0, 2, 8]

# R² 계산
r2 = r2_score(y_true, y_pred)
print("R²:", r2)

In [None]:
#선형 회귀모델 예제 : 연봉과 직장 만족도
x = [3000, 4200, 4000, 5000, 6000, 3800, 3500, 6200, 3900, 4500]
y = [60, 75, 70, 85, 90, 70, 65, 95, 70, 80]
data = {'x': x, 'y': y}
df = pd.DataFrame(data)
plt.scatter(df['x'], df['y'])
plt.show( )

In [None]:
from sklearn.linear_model import LinearRegression

X = df[['x']]  # 독립 변수
y = df['y']    # 종속 변수
model = LinearRegression()
model.fit(X, y)

print("Slope (Coefficient):", model.coef_[0])
print("Intercept:", model.intercept_)

In [None]:
plt.scatter(df['x'], df['y'], color='blue')
plt.plot(df['x'], model.predict(X), color='red')  # 회귀선 추가
plt.xlabel('Salary')
plt.ylabel('Job Satisfaction')
plt.title('Linear Regression: Salary vs Job Satisfaction')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test=train_test_split(X_titanic_df, y_titanic_df, test_size=0.2, random_state=10)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr_clf = LogisticRegression(solver='liblinear')
lr_clf.fit(X_train , y_train)
lr_pred = lr_clf.predict(X_test)

print('LogisticRegression 정확도: {0:.4f}'.format(accuracy_score(y_test, lr_pred)))

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report

lr_clf = LogisticRegression(solver='liblinear')
lr_clf.fit(X_train, y_train)
lr_pred = lr_clf.predict(X_test)

print('accuracy: {:.4f}'.format(accuracy_score(y_test, lr_pred)))
print('precision: {:.4f}'.format(precision_score(y_test, lr_pred, average='weighted')))
print('recall: {:.4f}'.format(recall_score(y_test, lr_pred, average='weighted')))
print('f1-score: {:.4f}'.format(f1_score(y_test, lr_pred, average='weighted')))

conf_matrix = confusion_matrix(y_test, lr_pred)

plt.figure(figsize=(3, 2))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()