In [1]:
from sklearn.ensemble import RandomForestClassifier,StackingClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV

In [2]:
df = pd.read_csv('train.csv')
df.drop(columns=["Ticket",'Cabin','PassengerId',"Name"],inplace=True)
df = pd.get_dummies(df,columns=['Embarked',"Sex"])


In [4]:
X = df.drop(columns = ['Survived'])
y = df['Survived']
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=.2,random_state=42)

In [8]:

base_estimators = [
    ("rf",RandomForestClassifier()),
    ('dt',DecisionTreeClassifier(random_state=41))
]
final_estimator = GradientBoostingClassifier()

model = StackingClassifier(
    estimators = base_estimators,
    final_estimator = final_estimator
)
param_grid = {
    'dt__max_depth':[2,4,6],
    'final_estimator__n_estimators':[70,80,90,100]
}
grid = GridSearchCV(model,param_grid=param_grid,scoring='accuracy',cv =5 ,n_jobs=-1)
grid.fit(X,y)

In [9]:

print("Best Parameters:", grid.best_params_)
print("Best Score:", grid.best_score_)

Best Parameters: {'dt__max_depth': 2, 'final_estimator__n_estimators': 100}
Best Score: 0.8136902893729209


In [10]:
best_model = grid.best_estimator_


In [15]:
df_test_ = pd.read_csv('./test.csv')
df_test = df_test_.drop(columns=["Ticket",'Cabin','PassengerId',"Name"])
df_test = pd.get_dummies(df_test,columns=['Embarked',"Sex"])

In [16]:
y_pred = best_model.predict(df_test)

In [17]:
y_pred = pd.Series(y_pred)
df_final = pd.concat([df_test_.PassengerId,y_pred],axis=1)

In [None]:
df_final.to_csv('submission.csv',index=False)