In [9]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings

# Отключение предупреждений
warnings.filterwarnings("ignore")

# Шаг 1: Загрузка данных
file_path = "train.csv"
df = pd.read_csv(file_path)

print("Данные успешно загружены.")

# Шаг 2: Исключение признаков
df = df.drop(['PassengerId'], axis=1)

# Шаг 3: Разделение выборки
X = df.drop('Survived', axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Шаг 4: Преобразование признаков
numeric_features = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
categorical_features = ['Sex', 'Embarked']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(drop='first'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Шаг 5: Обучение моделей
# Логистическая регрессия
logreg = LogisticRegression(random_state=42)
logreg_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('classifier', logreg)])
logreg_pipeline.fit(X_train, y_train)
y_pred_logreg = logreg_pipeline.predict(X_test)

# KNN
knn = KNeighborsClassifier()
knn_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                ('classifier', knn)])
knn_pipeline.fit(X_train, y_train)
y_pred_knn = knn_pipeline.predict(X_test)

# Шаг 6: Подбор гиперпараметров
# Подбор параметров для логистической регрессии
param_dist_logreg = {'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                     'classifier__penalty': ['l1', 'l2']}
random_search_logreg = RandomizedSearchCV(logreg_pipeline, param_distributions=param_dist_logreg, n_iter=10, random_state=42)
random_search_logreg.fit(X_train, y_train)

# Подбор параметров для KNN
param_dist_knn = {'classifier__n_neighbors': [3, 5, 7, 9, 11],
                  'classifier__weights': ['uniform', 'distance'],
                  'classifier__metric': ['euclidean', 'manhattan']}
random_search_knn = RandomizedSearchCV(knn_pipeline, param_distributions=param_dist_knn, n_iter=10, random_state=42)
random_search_knn.fit(X_train, y_train)

# Шаг 7: Оценка новых моделей
y_pred_logreg_tuned = random_search_logreg.predict(X_test)
y_pred_knn_tuned = random_search_knn.predict(X_test)

# Метрики для логистической регрессии
print("Logistic Regression Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_logreg_tuned))
print("Classification Report:\n", classification_report(y_test, y_pred_logreg_tuned))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logreg_tuned))

# Метрики для KNN
print("\nKNN Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_knn_tuned))
print("Classification Report:\n", classification_report(y_test, y_pred_knn_tuned))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn_tuned))

# Вывод о завершении операций
print("Операции успешно выполнены.")


Данные успешно загружены.
Logistic Regression Metrics:
Accuracy: 0.8044692737430168
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.86      0.84       105
           1       0.78      0.73      0.76        74

    accuracy                           0.80       179
   macro avg       0.80      0.79      0.80       179
weighted avg       0.80      0.80      0.80       179

Confusion Matrix:
 [[90 15]
 [20 54]]

KNN Metrics:
Accuracy: 0.7988826815642458
Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.89      0.84       105
           1       0.81      0.68      0.74        74

    accuracy                           0.80       179
   macro avg       0.80      0.78      0.79       179
weighted avg       0.80      0.80      0.80       179

Confusion Matrix:
 [[93 12]
 [24 50]]
Операции успешно выполнены.
