# Titanic Classification 

- Description: Method of classification using QR code and convolutional neural network
- Author: Guilherme Righetto
- Sense: None

In [26]:
import qrcode
import pandas as pd

### Data Munging 

In [32]:
df_train = pd.read_csv("Data/train.csv", sep=",")
df_test = pd.read_csv("Data/test.csv", sep=",")

df_train["_data"] = "train"
df_test["_data"] = "test"

list_test = df_test["PassengerId"].tolist()

df_data = pd.concat([df_train, df_test], ignore_index=True)

In [33]:
df_data.loc[df_data["Sex"] == 'male', "Sex"] = 0
df_data.loc[df_data["Sex"] == 'female', "Sex"] = 1
df_data["Sex"] = df_data["Sex"].astype(int)

In [34]:
df_data["Embarked"].fillna("S", inplace=True)

df_data.loc[df_data["Embarked"] == "S", "Embarked"] = 0
df_data.loc[df_data["Embarked"] == "C", "Embarked"] = 1
df_data.loc[df_data["Embarked"] == "Q", "Embarked"] = 2
df_data["Embarked"] = df_data["Embarked"].astype(int)

df_tmp = pd.get_dummies(df_data["Embarked"], prefix="Embarked")
del df_data["Embarked"]
df_data = pd.concat([df_data, df_tmp], axis=1)

In [35]:
df_data["Cabin"].fillna("N", inplace=True)
df_data["Cabin"] = df_data["Cabin"].str.replace(r'\d+',"")
df_data["Cabin"] = df_data["Cabin"].str.replace(r' .*',"")

df_data.loc[df_data["Cabin"] == "A", "Cabin"] = 0
df_data.loc[df_data["Cabin"] == "B", "Cabin"] = 1
df_data.loc[df_data["Cabin"] == "C", "Cabin"] = 3
df_data.loc[df_data["Cabin"] == "D", "Cabin"] = 9
df_data.loc[df_data["Cabin"] == "E", "Cabin"] = 15
df_data.loc[df_data["Cabin"] == "F", "Cabin"] = 31
df_data.loc[df_data["Cabin"] == "G", "Cabin"] = 63
df_data.loc[df_data["Cabin"] == "T", "Cabin"] = 127
df_data.loc[df_data["Cabin"] == "N", "Cabin"] = 255
df_data["Cabin"] = df_data["Cabin"].astype(int)

df_tmp = pd.get_dummies(df_data["Cabin"], prefix="Cabin")
del df_data["Cabin"]
df_data = pd.concat([df_data, df_tmp], axis=1)

In [36]:
df_data["Ticket"] = df_data["Ticket"].str.replace(r'\W',"")
df_data["Ticket"] = df_data["Ticket"].str.replace(r'\d',"")
df_data.loc[df_data["Ticket"] == "", "Ticket"] = "N"
df_data["Ticket"] = df_data["Ticket"].astype('category').cat.codes

df_tmp = pd.get_dummies(df_data["Ticket"], prefix="Ticket")
del df_data["Ticket"]
df_data = pd.concat([df_data, df_tmp], axis=1)

In [37]:
df_data["Name"] = df_data["Name"].str.replace(r'.*Mr.*',"Mrs")
df_data["Name"] = df_data["Name"].str.replace(r'.*Miss.*',"Miss")
df_data["Name"] = df_data["Name"].str.replace(r'.*Master.*',"Master")
df_data["Name"] = df_data["Name"].str.replace(r'.*Dr.*',"Dr")
df_data["Name"] = df_data["Name"].str.replace(r'.*Mlle.*|.*Mme.*',"Mlle")
df_data["Name"] = df_data["Name"].str.replace(r'.*Capt.*|.*Don.*|.*Major.*|.*Sir.*|.*Jonkheer.*',"Sir")
df_data["Name"] = df_data["Name"].str.replace(r'.*Dona.*|.*Lady.*|.*the Countess.*',"lady")
df_data["Name"] = df_data["Name"].str.replace(r'.* .*',"Others")

df_data.loc[df_data["Name"] == "Miss", "Name"] = 127
df_data.loc[df_data["Name"] == "Mlle", "Name"] = 63
df_data.loc[df_data["Name"] == "lady", "Name"] = 31
df_data.loc[df_data["Name"] == "Master", "Name"] = 15
df_data.loc[df_data["Name"] == "Sir", "Name"] = 9
df_data.loc[df_data["Name"] == "Dr", "Name"] = 3
df_data.loc[df_data["Name"] == "Mrs", "Name"] = 1
df_data.loc[df_data["Name"] == "Others", "Name"] = 0
df_data["Name"] = df_data["Name"].astype(int)

df_tmp = pd.get_dummies(df_data["Name"], prefix="Name")
del df_data["Name"]
df_data = pd.concat([df_data, df_tmp], axis=1)

In [38]:
df_data["FamilySize"] = df_data["SibSp"] + df_data["Parch"] + 1

df_data["Age"].fillna(int(df_data["Age"].mean()), inplace=True)

df_data["Children"] = 0
df_data.loc[df_data["Age"] < 16, "Children"] = 1

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


### Converting data to QR code

In [50]:
qr = qrcode.QRCode(
    version=2,
    error_correction=qrcode.constants.ERROR_CORRECT_L,
    box_size=2,
    border=0,
)

for line in df_data.iterrows():
    qr.add_data(list(line[1].values))
    qr.make(fit=True)
    img = qr.make_image()
    img
    break

In [52]:
img.size

(106, 106)