# 📘 Лабораторная работа №3 (часть 2)
## Логистическая регрессия: анализ данных Titanic

In [3]:
# Импорт библиотек
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

## 🔹 Загрузка и первичный анализ данных

In [4]:
# Загрузка датасета (замените путь при необходимости)
df = pd.read_csv("Titanic.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## 🔹 Шаг 1. Предобработка данных

In [5]:
# Копирование и удаление ненужных столбцов
data = df.copy()
data.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)

# Удаление строк с пропущенными значениями
data.dropna(inplace=True)

# Кодирование категориальных признаков
data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
data["Embarked"] = data["Embarked"].map({"S": 0, "C": 1, "Q": 2})

# Проверка
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,0,22.0,1,0,7.25,0
1,1,1,1,38.0,1,0,71.2833,1
2,1,3,1,26.0,0,0,7.925,0
3,1,1,1,35.0,1,0,53.1,0
4,0,3,0,35.0,0,0,8.05,0


### Процент потерянных данных

In [6]:
lost_percent = 100 * (1 - len(data) / len(df))
print(f"Потеряно данных: {lost_percent:.2f}%")

Потеряно данных: 20.09%


## 🔹 Шаг 2. Обучение логистической регрессии

In [7]:
# Разделение данных
X = data.drop("Survived", axis=1)
y = data["Survived"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Обучение модели
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Предсказание и точность
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Точность модели: {acc:.4f}")

Точность модели: 0.7897


### Влияние признака `Embarked`

In [8]:
# Обучение без признака Embarked
X_wo_embarked = X.drop(columns=["Embarked"])
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_wo_embarked, y, test_size=0.3, random_state=42)

model2 = LogisticRegression(max_iter=1000)
model2.fit(X_train2, y_train2)
y_pred2 = model2.predict(X_test2)
acc2 = accuracy_score(y_test2, y_pred2)
print(f"Точность без Embarked: {acc2:.4f}")

Точность без Embarked: 0.8037
