In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
inputData = pd.read_csv('/kaggle/input/titanic/train.csv')
inputData

In [4]:
import missingno
missingno.matrix(inputData)

print('-'*40)
print(inputData.isna().sum().sort_values(ascending = False))

In [5]:
import seaborn as sns
tempInput = inputData.copy()[["Age","Survived"]]

def roundAge(age):
    if age > 0:
        return round(age)
    else:
        return 0
    
updatedAge = tempInput['Age'].map(roundAge)
tempInput['Age'] = updatedAge
tempInput['count'] = 1
tempInput.groupby(["Age","Survived"], as_index=False).count()

sns.set(rc={'figure.figsize':(30,5)})
sns.heatmap(tempInput.groupby(["Age","Survived"], as_index=False).count().pivot("Survived", "Age",values='count'), cmap= 'coolwarm', annot=True)

In [6]:
from sklearn.impute import SimpleImputer
my_imputer = SimpleImputer()

def sexChange(data):
    if data == "female":
        data = 1
    elif data == "male": 
        data = 0
    return data
    

not_encoded_X = inputData.drop(columns=['PassengerId','Survived', 'Name', 'Ticket', 'Cabin'])

updatedSex = not_encoded_X['Sex'].map(sexChange)
not_encoded_X['Sex'] = updatedSex

encoded_X = pd.get_dummies(not_encoded_X)
X = my_imputer.fit_transform(encoded_X)

X

In [7]:
y = inputData['Survived']
y

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier

def developing(models=[], weights=None, retryTimes=10):
    scores = []
    estimators= []
    for decisionMaker in models:
        tempModel = decisionMaker["decisionMaker"](**decisionMaker["options"])
        tempEstimator = (decisionMaker["modelName"], tempModel)
        estimators.append(tempEstimator)

    for i in range(retryTimes):

        model = VotingClassifier(estimators=estimators, weights=weights)

        X_traint, X_test, y_train, y_test =train_test_split(X, y, test_size=0.2)
        model.fit(X_traint, y_train)
        predictions = model.predict(X_test)
        predictions
        score = accuracy_score(y_test, predictions.round())
        scores.append(score)
        print(i, ".score: ", score)

        
    np_arr_scores = np.array(scores)
    print("weights : ", weights, " mean score: ", np.mean(np_arr_scores) ," max s: ", np.max(np_arr_scores) ," min s: ", np.min(np_arr_scores))

models = [
    {
        "decisionMaker": DecisionTreeClassifier,
        "options":{
            "max_leaf_nodes":100
        },
        "modelName": "dtc"
    },
    {
        "decisionMaker": RandomForestClassifier,
        "options":{
            "max_depth": 8,
            "n_estimators": 905,
            "min_samples_split": 14,
            "min_samples_leaf": 1,
            "max_features": 0.3964,
            "oob_score": True,
            "random_state": 42,
            "n_jobs": -1
        },
        "modelName": "rfc1"
    },
    {
        "decisionMaker": MLPClassifier,
        "options":{
            "random_state":1,
            "max_iter":400
        },
        "modelName": "clpc"
    }
]
                   

developing(models=models, weights=[1,100,10], retryTimes=10)  

In [11]:
def finalTest(models=[], weights=None):
    estimators= []
    for decisionMaker in models:
        tempModel = decisionMaker["decisionMaker"](**decisionMaker["options"])
        tempEstimator = (decisionMaker["modelName"], tempModel)
        estimators.append(tempEstimator)
    model = VotingClassifier(estimators=estimators, weights=weights)

    model.fit(X, y)
    testInputRaw = pd.read_csv('/kaggle/input/titanic/test.csv')
    not_encoded_test_X = testInputRaw.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])
    
    updatedSex = not_encoded_test_X['Sex'].map(sexChange)
    not_encoded_test_X['Sex'] = updatedSex
    
    encoded_test_X = pd.get_dummies(not_encoded_test_X)
    testInput = my_imputer.fit_transform(encoded_test_X)
    testResult = model.predict(testInput)
    output = pd.DataFrame({
        "PassengerId": np.array(testInputRaw['PassengerId']),
        "Survived": np.array(testResult)
    })

    output.to_csv('submission.csv', index=False)
    print("Your submission was successfully saved!")
    
    
finalTest(models=models, weights=[1,100,10])