In [1]:
import numpy as np
import pandas as pd

titanic_train = pd.read_csv("train.csv")      # Read the data

char_cabin = titanic_train["Cabin"].astype(str)    # Convert cabin to str

new_Cabin = np.array([cabin[0] for cabin in char_cabin]) # Take first letter

titanic_train["Cabin"] = pd.Categorical(new_Cabin)  # Save the new cabin var

In [2]:
my_tab = pd.crosstab(index=titanic_train["Survived"],  # Make a crosstab
                     columns="count")                  # Name the count column

my_tab

col_0,count
Survived,Unnamed: 1_level_1
0,549
1,342


In [3]:
type(my_tab)

pandas.core.frame.DataFrame

In [4]:
pd.crosstab(index=titanic_train["Pclass"],  # Make a crosstab
            columns="count")                # Name the count column

col_0,count
Pclass,Unnamed: 1_level_1
1,216
2,184
3,491


In [5]:
pd.crosstab(index=titanic_train["Sex"],     # Make a crosstab
                      columns="count")      # Name the count column

col_0,count
Sex,Unnamed: 1_level_1
female,314
male,577


In [6]:
cabin_tab = pd.crosstab(index=titanic_train["Cabin"],  # Make a crosstab
                        columns="count")               # Name the count column

cabin_tab 

col_0,count
Cabin,Unnamed: 1_level_1
A,15
B,47
C,59
D,33
E,32
F,13
G,4
T,1
n,687


In [7]:
titanic_train.Sex.value_counts()

Sex
male      577
female    314
Name: count, dtype: int64

In [8]:
print (cabin_tab.sum(), "\n")   # Sum the counts

print (cabin_tab.shape, "\n")   # Check number of rows and cols

cabin_tab.iloc[1:7]             # Slice rows 1-6

col_0
count    891
dtype: int64 

(9, 1) 



col_0,count
Cabin,Unnamed: 1_level_1
B,47
C,59
D,33
E,32
F,13
G,4


In [9]:
cabin_tab/cabin_tab.sum()

col_0,count
Cabin,Unnamed: 1_level_1
A,0.016835
B,0.05275
C,0.066218
D,0.037037
E,0.035915
F,0.01459
G,0.004489
T,0.001122
n,0.771044


In [10]:
# Table of survival vs. sex
survived_sex = pd.crosstab(index=titanic_train["Survived"], 
                           columns=titanic_train["Sex"])

survived_sex.index= ["died","survived"]

survived_sex

Sex,female,male
died,81,468
survived,233,109


In [11]:
# Table of survival vs passenger class
survived_class = pd.crosstab(index=titanic_train["Survived"], 
                            columns=titanic_train["Pclass"])

survived_class.columns = ["class1","class2","class3"]
survived_class.index= ["died","survived"]

survived_class

Unnamed: 0,class1,class2,class3
died,80,97,372
survived,136,87,119


In [12]:
# Table of survival vs passenger class
survived_class = pd.crosstab(index=titanic_train["Survived"], 
                            columns=titanic_train["Pclass"],
                             margins=True)   # Include row and column totals

survived_class.columns = ["class1","class2","class3","rowtotal"]
survived_class.index= ["died","survived","coltotal"]

survived_class

Unnamed: 0,class1,class2,class3,rowtotal
died,80,97,372,549
survived,136,87,119,342
coltotal,216,184,491,891


In [13]:
survived_class/survived_class.loc["coltotal","rowtotal"]

Unnamed: 0,class1,class2,class3,rowtotal
died,0.089787,0.108866,0.417508,0.616162
survived,0.152637,0.097643,0.133558,0.383838
coltotal,0.242424,0.20651,0.551066,1.0


In [14]:
survived_class/survived_class.loc["coltotal"]


Unnamed: 0,class1,class2,class3,rowtotal
died,0.37037,0.527174,0.757637,0.616162
survived,0.62963,0.472826,0.242363,0.383838
coltotal,1.0,1.0,1.0,1.0


In [15]:
survived_class.div(survived_class["rowtotal"],
                   axis=0)

Unnamed: 0,class1,class2,class3,rowtotal
died,0.145719,0.176685,0.677596,1.0
survived,0.397661,0.254386,0.347953,1.0
coltotal,0.242424,0.20651,0.551066,1.0


In [16]:
survived_class.T/survived_class["rowtotal"]


Unnamed: 0,died,survived,coltotal
class1,0.145719,0.397661,0.242424
class2,0.176685,0.254386,0.20651
class3,0.677596,0.347953,0.551066
rowtotal,1.0,1.0,1.0


In [17]:
surv_sex_class = pd.crosstab(index=titanic_train["Survived"], 
                             columns=[titanic_train["Pclass"],
                                      titanic_train["Sex"]],
                             margins=True)   # Include row and column totals

surv_sex_class

Pclass,1,1,2,2,3,3,All
Sex,female,male,female,male,female,male,Unnamed: 7_level_1
Survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,3,77,6,91,72,300,549
1,91,45,70,17,72,47,342
All,94,122,76,108,144,347,891


In [18]:
surv_sex_class[2]        # Get the subtable under Pclass 2

Sex,female,male
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1
0,6,91
1,70,17
All,76,108


In [19]:
surv_sex_class[2]["female"]   # Get female column within Pclass 2

Survived
0       6
1      70
All    76
Name: female, dtype: int64

In [20]:
surv_sex_class/surv_sex_class.loc["All"]    # Divide by column totals

Pclass,1,1,2,2,3,3,All
Sex,female,male,female,male,female,male,Unnamed: 7_level_1
Survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,0.031915,0.631148,0.078947,0.842593,0.5,0.864553,0.616162
1,0.968085,0.368852,0.921053,0.157407,0.5,0.135447,0.383838
All,1.0,1.0,1.0,1.0,1.0,1.0,1.0
