In [None]:
import pandas as pd
import pathlib as path
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
train_path = r'C:\Users\user\Documents\GitHub\Notebooks-on-ml\TITANIC\Data\train.csv'
test_path = r'C:\Users\user\Documents\GitHub\Notebooks-on-ml\TITANIC\Data\test.csv'
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

def get_safe_survival_mapping(df_train, df_test):
    train_fs = df_train.copy()
    test_fs = df_test.copy()
    for df in [train_fs, test_fs]:
        df['Surname'] = df['Name'].apply(lambda x: x.split(',')[0])
    
    train_fs['Family_Survival'] = 0.5
    test_fs['Family_Survival'] = 0.5

    for _, grp_df in train_fs.groupby(['Surname', 'Fare']):
        if len(grp_df) > 1:
            for ind, row in grp_df.iterrows():
                others = grp_df.drop(ind)
                if others['Survived'].max() == 1.0: train_fs.loc[ind, 'Family_Survival'] = 1
                elif others['Survived'].max() == 0.0: train_fs.loc[ind, 'Family_Survival'] = 0

    for _, grp_df in train_fs.groupby('Ticket'):
        if len(grp_df) > 1:
            for ind, row in grp_df.iterrows():
                if train_fs.loc[ind, 'Family_Survival'] == 0.5:
                    others = grp_df.drop(ind)
                    if others['Survived'].max() == 1.0: train_fs.loc[ind, 'Family_Survival'] = 1
                    elif others['Survived'].max() == 0.0: train_fs.loc[ind, 'Family_Survival'] = 0

    for ind, row in test_fs.iterrows():
        family_in_train = train_fs[(train_fs['Surname'] == row['Surname']) & (train_fs['Fare'] == row['Fare'])]
        ticket_in_train = train_fs[train_fs['Ticket'] == row['Ticket']]
        
        combined = pd.concat([family_in_train, ticket_in_train])
        
        if len(combined) > 0:
            if combined['Survived'].max() == 1.0:
                test_fs.loc[ind, 'Family_Survival'] = 1
            elif combined['Survived'].max() == 0.0:
                test_fs.loc[ind, 'Family_Survival'] = 0
                
    full_mapping = pd.concat([train_fs[['PassengerId', 'Family_Survival']], 
                             test_fs[['PassengerId', 'Family_Survival']]])
    return full_mapping

def prepare_data(df_input, survival_mapping):
    df = df_input.copy()
    
    df['Ticket_Group_Size'] = df.groupby('Ticket')['Ticket'].transform('count')
    df = df.merge(survival_mapping, on='PassengerId', how='left')
    df['Title'] = df['Name'].apply(lambda x: x.split(',')[1].split('.')[0].strip())
    df['Title'] = df['Title'].replace(['Rev', 'Dr'], 'Service')
    df['Title'] = df['Title'].replace(['Jonkheer', 'Don', 'Sir', 'Lady', 'Countess', 'Dona'], 'Noble')
    df['Title'] = df['Title'].replace(['Capt', 'Col', 'Major'], 'Officer')
    df['Title'] = df['Title'].replace(['Mlle', 'Ms'], 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')
    
    df['Has_Cabin'] = df['Cabin'].apply(lambda x: 0 if pd.isna(x) else 1)
    df['Deck'] = df['Cabin'].str[0].fillna('M')
    df['Deck'] = df['Deck'].replace(['A', 'B', 'C'], 'Top')
    df['Deck'] = df['Deck'].replace(['D', 'E'], 'Middle')
    df['Deck'] = df['Deck'].replace(['F', 'G', 'T', 'M'], 'Low')
    
    df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
    df['Age'] = df.groupby('Title')['Age'].transform(lambda x: x.fillna(x.median()))
    df['Fare'] = df['Fare'].fillna(df['Fare'].median())
    df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
    
    cols_to_drop = ['Name', 'Ticket', 'Cabin', 'PassengerId', 'SibSp', 'Parch']
    df = df.drop(columns=[c for c in cols_to_drop if c in df.columns])
    
    df = pd.get_dummies(df, columns=['Title', 'Deck', 'Embarked'])
    
    return df.astype(float)

full_survival_map = get_safe_survival_mapping(df_train, df_test)

X = X = prepare_data(df_train, full_survival_map).drop(columns=['Survived'])
y = df_train['Survived']
X_test_final = prepare_data(df_test, full_survival_map)
X_test_final = X_test_final.reindex(columns=X.columns, fill_value=0)

model = RandomForestClassifier(
    n_estimators=370, 
    max_depth=6, 
    min_samples_split=12, 
    min_samples_leaf=4, 
    random_state=42
)

xgb_model = XGBClassifier(
    n_estimators=284,
    max_depth=8,
    learning_rate=0.010129089335270519,
    subsample=0.7450299265656695,
    colsample_bytree=0.7041909842845945,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

ensemble_model = VotingClassifier(
    estimators = [
        ('rf', model),
        ('xgb', xgb_model)
    ],
    voting='soft',
    weights=[0.26644968728053153, 0.880537546649935]
).fit(X, y)
probs = ensemble_model.predict_proba(X_test_final)[:, 1]
predictions = (probs >= 0.5).astype(int)
submission = pd.DataFrame({
    "PassengerId": df_test["PassengerId"],
    "Survived": predictions
})

submission.to_csv('submission_final.csv', index=False)
print("Файл готов к отправке!")

Файл готов к отправке!
