In [1]:
import seaborn as sns

passengers = sns.load_dataset('titanic')
print(passengers.shape)
print(passengers.head())

(891, 15)
   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


In [2]:
# Data pre-processing
passengers['sex'] = passengers['sex'].map({'female':1,'male':0})

In [3]:
# 결측치 처리
passengers['age'].fillna(value=passengers['age'].mean(), inplace=True)

In [4]:
# Feature 분리하기
passengers['FirstClass'] = passengers['pclass'].apply(lambda x: 1 if x == 1 else 0)
passengers['SecondClass'] = passengers['pclass'].apply(lambda x: 1 if x == 2 else 0)

features = passengers[['sex', 'age', 'FirstClass', 'SecondClass']]
survival = passengers['survived']

In [5]:
# Train/Test set 분리하기
from sklearn.model_selection import train_test_split

train_features, test_features, train_labels, test_labels = train_test_split(features, survival)

In [6]:
# 데이터 정규화(Scaling) 하기
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

In [7]:
# 모델 생성 및 평가하기
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(train_features, train_labels)

print(model.score(train_features, train_labels))
print(model.score(test_features, test_labels))

0.8158682634730539
0.7488789237668162


In [8]:
# Feature들의 계수(Coefficients) 확인
# sex, age, firstclass, secondclass 순
print(model.coef_)

[[ 1.35508091 -0.47137038  1.03569715  0.4973982 ]]


In [9]:
# 새로운 임의의 데이터를 넣어서 예측
import numpy as np
Kim = np.array([0.0, 20.0, 0.0, 0.0])
Park = np.array([1.0, 17.0, 1.0, 0.0])
ME = np.array([1.0, 32.0, 1.0, 0.0])

sample_passengers = np.array([Kim, Park, ME])

In [10]:
# scaling
sample_passengers = scaler.transform(sample_passengers)



In [11]:
# 새로운 데이터에 대한 예측
print(model.predict(sample_passengers))

print(model.predict_proba(sample_passengers))

[0 1 1]
[[0.89519531 0.10480469]
 [0.04017465 0.95982535]
 [0.06682402 0.93317598]]


*** 결과: Kim은 죽고, Park과 Me만 생존함

In [12]:
# 붓꽃 데이터
# 필요한 라이브러리 임포트
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 데이터 로드
iris = load_iris()
X = iris.data
y = iris.target

# 데이터 분할 (훈련 세트와 테스트 세트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 로지스틱 회귀 모델 생성 및 훈련
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# 테스트 세트에서 예측
y_pred = log_reg.predict(X_test)

# 정확도 측정
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy}")

Model accuracy: 1.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
