## Imports

In [None]:
import pandas as pd
import numpy as np

## Settings and parameters

In [None]:
# settings for pandas
pd.set_option("display.max.columns",None) # alle kolommen tonen
pd.set_option("display.max.rows",500)    # eerste 500 rijen tonen
pd.set_option("display.precision", 2)     # precisie van de kolommen aanpassen
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x)) # floats output tot 3 decimalen
pd.set_option('display.max_colwidth', None)

# settings for extra passengers

add_extra_passengers = True

## Load data


In [None]:
datapath = '../data/'
filename = 'train.csv' 
df = pd.read_csv(datapath + filename)

### VARIABLE DESCRIPTIONS  
Pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)   
survival Survival (0 = No; 1 = Yes)  
name Name   
sex Sex  
age Age  
sibsp Number of Siblings/Spouses Aboard  
parch Number of Parents/Children Aboard  
ticket Ticket Number  
fare Passenger Fare (British pound)  
cabin Cabin  
embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)  
boat Lifeboat  
body Body Identification Number  
home.dest Home/Destination  

### SPECIAL NOTES
Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)  
Pclass is a proxy for socio-economic status (SES)  
1st ~ Upper; 2nd ~ Middle; 3rd ~ Lower  
Age is in Years; Fractional if Age less than One (1)  
If the Age is estimated, it is in the form xx.5  
Fare is in Pre-1970 British Pounds ()  
Conversion Factors: 1 = 12s = 240d and 1s = 20d    
#### With respect to the family relation variables (i.e. sibsp and parch) some relations were ignored. The following are the definitions used for sibsp and parch.
Sibling: Brother, Sister, Stepbrother, or Stepsister of Passenger Aboard Titanic  
Spouse: Husband or Wife of Passenger Aboard Titanic (Mistresses and Fiances Ignored)  
Parent: Mother or Father of Passenger Aboard Titanic  
Child: Son, Daughter, Stepson, or Stepdaughter of Passenger Aboard Titanic  
Other family relatives excluded from this study include cousins, nephews/nieces,  
aunts/uncles, and in-laws. Some children travelled only with a nanny, therefore parch=0  
for them. As well, some travelled with very close friends or neighbors in a village,  
however, the definitions do not support such relations.  

In [None]:
rename_dict = {
"PassengerId": "Passagier_Id",
"Survived": "Overleefd",
"Pclass": "Ticket_klasse",
"Name": "Naam",
"Sex": "Geslacht",
"Age": "Leeftijd",
"SibSp": "Aantal_overige_familieleden",
"Parch": "Aantal_kinderen",
"Ticket": "Ticket_nummer",
"Fare": "Ticket_prijs",
"Cabin": "Cabine_nummer",
"Embarked": "Opstapplaats"
}

In [None]:
df = df.rename(columns=rename_dict)

df = df[['Passagier_Id',
 'Naam',
 'Geslacht',
 'Leeftijd',
 'Opstapplaats',
 'Aantal_kinderen',
 'Aantal_overige_familieleden',
 'Ticket_nummer',
 'Ticket_klasse',
 'Ticket_prijs',
 'Cabine_nummer',
 'Overleefd']]

df['Workshop_passagier'] = 0

In [None]:
df.info() #kolommen waar nog iets mee moet: leeftijd, opstapplaats & cabine nummer

In [None]:
df.tail(3)

In [None]:
Naam_1 = "Maikel Jonkers (Kapitein)" # string
Geslacht_1 = "Man" #Man/vrouw
Leeftijd_1 = 40 # int ?
Opstapplaats_1 = "Southampton" # Southampton(Engeland), Queenstown(Ierland),  Cherbourg (Frankrijk)
Aantal_kinderen_1 = 2 # int
Aantal_overige_familieleden_1 = 3 # int
Ticket_klasse_1 = 1 #1,2,3
Ticket_prijs_1 = 100 # int

In [None]:
Naam_2 = "Luuk Jans (Matroos)" # string
Geslacht_2 = "Man" #Man/vrouw
Leeftijd_2 = 30 # int ?
Opstapplaats_2 = "Southampton" # Southampton(Engeland), Queenstown(Ierland),  Cherbourg (Frankrijk)
Aantal_kinderen_2 = 0 # int
Aantal_overige_familieleden_2 = 4 # int
Ticket_klasse_2 = 2 #1,2,3
Ticket_prijs_2 = 300 # int

In [None]:
def voeg_passagier_toe(passagier_nummer):
    if add_extra_passengers == True:
        nieuwe_index = int(df.tail(1).index.item()) + 1
        passagier_nummer = str(passagier_nummer)
        df.loc[nieuwe_index] = [
                    int(nieuwe_index) + 1, 
                    eval("Naam_"+passagier_nummer), 
                    eval("Geslacht_"+passagier_nummer),
                    eval("Leeftijd_"+passagier_nummer),
                    eval("Opstapplaats_"+passagier_nummer),
                    eval("Aantal_kinderen_"+passagier_nummer),
                    eval("Aantal_overige_familieleden_"+passagier_nummer),
                    "NaN",  #Ticket_nummer
                    eval("Ticket_klasse_"+passagier_nummer),
                    eval("Ticket_prijs_"+passagier_nummer),
                    "NaN",  #Cabine_nummer
                    "0",    #Overleefd
                    1       #Workshop_passagier
                    ]
        print(eval("Naam_"+passagier_nummer) + " is toegevoegd aan de passagiers lijst.")

In [None]:
# als deze cel 2x gedraaid wordt dan worden de personen nog een keer toegevoegd. iets inbouwen zodat het niet kan (zoals index vastzetten?)?
voeg_passagier_toe(1)
voeg_passagier_toe(2)

In [None]:
df.tail(3)

In [None]:
def df_replace_column_values(df,column,old_value,new_value):
    df.loc[df[column] == old_value, column] = new_value

In [None]:
df_replace_column_values(df=df,column="Opstapplaats",old_value="C",new_value="Cherbourg")
df_replace_column_values(df=df,column="Opstapplaats",old_value="Q",new_value="Queenstown")
df_replace_column_values(df=df,column="Opstapplaats",old_value="S",new_value="Southampton")

df_replace_column_values(df=df,column="Geslacht",old_value="male",new_value="Man")
df_replace_column_values(df=df,column="Geslacht",old_value="female",new_value="Vrouw")

In [None]:
df

In [None]:
print(df["Leeftijd"].unique())  
print(df["Geslacht"].unique())
print(df["Opstapplaats"].unique())
print(df["Aantal_kinderen"].unique())
print(df["Aantal_overige_familieleden"].unique())
print(df["Ticket_klasse"].unique())

 ![Alt text](../images/southampton.jpg) 
 ![Alt text](../images/queenstown.jpg)
 ![Alt text](../images/southampton.jpg)