<img src="https://miro.medium.com/max/2500/1*Q59ONUgBl159KgMJvghShA.jpeg" width="500" height="500">

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, accuracy_score
from keras.models import Sequential
from keras.layers import Dense
import warnings
warnings.filterwarnings("ignore")

In [None]:
train = pd.read_csv("../input/tabular-playground-series-apr-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-apr-2021/test.csv")
submission = pd.read_csv("../input/tabular-playground-series-apr-2021/sample_submission.csv")

In [None]:
print("Shape of train: ", train.shape)
print("Shape of test: ", test.shape)
print("Shape of sample_submission: ", submission.shape)

In [None]:
train.head()

In [None]:
test.info()

In [None]:
train.info()

## Editing datasets

In [None]:
train.drop(labels=['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
test.drop(labels=['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [None]:
cols = ['Age', 'Fare']
for col in cols:
    train[col] = train[col].fillna(train[col].sum()/train[col].size)
    test[col] = test[col].fillna(test[col].sum()/test[col].size)

In [None]:
def embarked_max_value(dataset):
    embarked_dict = {'C':0, 'Q':0, 'S':0}
    for i in dataset['Embarked']:
        if str(i) != "nan":
            embarked_dict[i] += 1
    return max(embarked_dict)

In [None]:
train['Embarked'] = train['Embarked'].fillna(embarked_max_value(train))
test['Embarked'] = test['Embarked'].fillna(embarked_max_value(test))

## EDA (Evaluation Data Analysis) 📊

### Countplot

In [None]:
def countplot(str): # fig, ax kullanarak dene
    sns.countplot(test[str], color="#56ad74", label='test')
    sns.countplot(train[str],color="#345082",  label='train')
    plt.legend()
    plt.show()

In [None]:
countplot('Sex')

In [None]:
countplot('Pclass')

In [None]:
countplot('SibSp')

In [None]:
sns.countplot(train['Survived'])
plt.show()

### Distplot

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(10,10))
for i, col in enumerate(train.iloc[:,[0,1,3,4,5,6]]):
    x = i // 3
    y = i % 3
    sns.distplot(train[col], ax=ax[x, y], color="blue")
plt.tight_layout()
plt.show()

### Encoding

In [None]:
def encoder(train_dataset, test_dataset):
    le = LabelEncoder()
    train_dataset['Sex'] = le.fit_transform(train_dataset['Sex'])
    test_dataset['Sex'] = le.transform(test_dataset['Sex'])
    train_dataset['Embarked'] = le.fit_transform(train_dataset['Embarked'])
    test_dataset['Embarked'] = le.transform(test_dataset['Embarked'])
    return train_dataset, test_dataset

In [None]:
train, test = encoder(train, test)

In [None]:
train.describe()

In [None]:
test.describe()

### Correlation Matrix

In [None]:
corr_matrix = train.corr()
corr_matrix['Survived'].sort_values().plot(kind="bar")
print(corr_matrix['Survived'].sort_values())
plt.show()

In [None]:
sns.clustermap(corr_matrix, annot=True, fmt=".3f", figsize=(10,10))
plt.title("Correlation Between Features")
plt.show()

### Pairplot

In [None]:
sns.pairplot(train, diag_kind="kde", hue="Survived")
plt.show()

### Boxplot

In [None]:
cols = ['Age', 'SibSp', 'Parch', 'Fare']
for col in cols:
    sns.boxplot(x=col, data=train, orient="h")
    plt.title(col)
    plt.show()

## Train test split

In [None]:
Y = train['Survived'].values
X = train.drop(labels=['Survived'], axis=1)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=58)

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)

In [None]:
def create_ann_model():
    model = Sequential()
    model.add(Dense(8, activation="relu", input_dim=7))
    model.add(Dense(4, activation="relu"))
    model.add(Dense(2, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", metrics=['accuracy'])
    return model

model = create_ann_model()
model.summary()

In [None]:
model.fit(x_train, y_train, epochs=5, batch_size=32) # ohe kullan

In [None]:
pred = model.predict(x_val)
pred = (pred > 0.5)
y_true = np.int64(y_val)
y_true = y_true.round()
pred = pred.round()
cm = confusion_matrix(y_true, pred)
score = accuracy_score(y_true, pred)
print("Score: ", score)
fig, ax = plt.subplots(figsize=(8,8))
sns.heatmap(cm, annot=True, linewidths=0.01, cmap="Blues", linecolor="green", fmt=".2f", ax=ax)
plt.xlabel("Predict")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

In [None]:
pred = model.predict(test)
submission['Survived'] = (pred[:, 0] > 0.5).astype(int)
submission.to_csv('submission.csv', index=False)