In [9]:
import pandas as pd

dataset = pd.read_csv('titanic.csv')
display(dataset)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [10]:
import pandas as pd

def fill_age(data):
    data= data[["Sex", "Age", "Pclass", "Fare"]]

    data_copy = data.copy()
    data_copy.loc[data_copy["Age"].isnull(), "Age"] = data_copy.groupby("Pclass")["Age"].transform("mean")

    return data_copy

In [11]:
import pandas as pd

def take_label(data):
    return data["Survived"]

In [12]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def minmax_train(train_data, test_data):
    scaler = MinMaxScaler()

    columns_to_normalize = ["Age", "Fare"]

    train_data[columns_to_normalize] = scaler.fit_transform(train_data[columns_to_normalize])
    test_data[columns_to_normalize] = scaler.transform(test_data[columns_to_normalize])

In [13]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


def knn(train_data, train_labels, test_data, test_labels):
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(train_data, train_labels)
    predictions = knn.predict(test_data)
    error_ratio = 1 - accuracy_score(test_labels, predictions)
    return error_ratio

In [14]:
from sklearn.preprocessing import LabelEncoder

def label_encoder(train_data, test_data):
    label_encoder = LabelEncoder()

    train_data["Sex"] = label_encoder.fit_transform(train_data["Sex"])
    test_data["Sex"] = label_encoder.transform(test_data["Sex"])

In [15]:
train_size = 0.7

row = len(dataset)

num_train_row = int(row * train_size)

train_data = dataset.iloc[:num_train_row]
test_data = dataset.iloc[num_train_row:]

print("Train Data \n", train_data)
print("Test Data \n", test_data)

train_labels = take_label(train_data)
test_labels = take_label(test_data)

print("Train Label \n", train_labels)

train_data = fill_age(train_data)
test_data = fill_age(test_data)

print("Train Data \n", train_data)
print("Test Data \n", test_data)

minmax_train(train_data, test_data)

print("Train Data \n", train_data)
print("Test Data \n", test_data)

label_encoder(train_data, test_data)

error = knn(train_data, train_labels, test_data, test_labels)
print("Error ratio : ", error)


Train Data 
      PassengerId  Survived  Pclass  \
0              1         0       3   
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
..           ...       ...     ...   
618          619         1       2   
619          620         0       2   
620          621         0       3   
621          622         1       1   
622          623         1       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                                 ...  

In [16]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()
error_ratio = 0

for train_index, test_index in loo.split(dataset):
    train_data = dataset.iloc[train_index]
    test_data = dataset.iloc[test_index]

    print("Train Data \n", train_data)
    print("Test Data \n", test_data)

    train_labels = take_label(train_data)
    test_labels = take_label(test_data)

    print("Train Label \n", train_labels)

    train_data = fill_age(train_data)
    test_data = fill_age(test_data)

    print("Train Data \n", train_data)
    print("Test Data \n", test_data)

    if test_data['Age'].isna().any():
        pclass = test_data['Pclass'].iloc[0]
        mean_age = train_data[train_data['Pclass'] == pclass]['Age'].mean()
        test_data['Age'].fillna(mean_age, inplace=True)

    minmax_train(train_data, test_data)

    print("Train Data \n", train_data)
    print("Test Data \n", test_data)

    label_encoder(train_data, test_data)
    error = knn(train_data, train_labels, test_data, test_labels)
    error_ratio += error

error_ratio /= len(dataset)
print("Error Ratio:", error_ratio)


Train Data 
      PassengerId  Survived  Pclass  \
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
5              6         0       3   
..           ...       ...     ...   
886          887         0       2   
887          888         1       1   
888          889         0       3   
889          890         1       1   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
5                                     Moran, Mr. James    male   NaN      0   
..                                                 ...  

Train Data 
      PassengerId  Survived  Pclass  \
0              1         0       3   
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
..           ...       ...     ...   
886          887         0       2   
887          888         1       1   
888          889         0       3   
889          890         1       1   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                                 ...  

In [17]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=10)
fold_index = 1
error_ratio = 0

for train_index, test_index in kf.split(dataset):
    train_data = dataset.iloc[train_index]
    test_data = dataset.iloc[test_index]
    fold_index += 1

    print("Train Data \n", train_data)
    print("Test Data \n", test_data)

    train_labels = take_label(train_data)
    test_labels = take_label(test_data)

    print("Train Label \n", train_labels)

    train_data = fill_age(train_data)
    test_data = fill_age(test_data)

    print("Train Data \n", train_data)
    print("Test Data \n", test_data)


    minmax_train(train_data, test_data)

    print("Train Data \n", train_data)
    print("Test Data \n", test_data)

    label_encoder(train_data, test_data)

    error = knn(train_data, train_labels, test_data, test_labels)
    error_ratio += error

fold_index -= 1
error_ratio = error_ratio / fold_index
print("Error ratio : ", error_ratio)

Train Data 
      PassengerId  Survived  Pclass                                      Name  \
90            91         0       3                      Christmann, Mr. Emil   
91            92         0       3                Andreasson, Mr. Paul Edvin   
92            93         0       1               Chaffee, Mr. Herbert Fuller   
93            94         0       3                   Dean, Mr. Bertram Frank   
94            95         0       3                         Coxon, Mr. Daniel   
..           ...       ...     ...                                       ...   
886          887         0       2                     Montvila, Rev. Juozas   
887          888         1       1              Graham, Miss. Margaret Edith   
888          889         0       3  Johnston, Miss. Catherine Helen "Carrie"   
889          890         1       1                     Behr, Mr. Karl Howell   
890          891         0       3                       Dooley, Mr. Patrick   

        Sex   Age  SibSp  