# Objective : Indexing & Selecting Data
<hr>

1. Indexing using loc
2. Indexing using iloc
3. Accessing with [ ]
4. Selecting data using isin
5. Selecting data using where
6. Selecting data using query
7. In & not in Operator
8. set & reset index
9. Selecting columns by type
10. Accessing multiIndex data

<hr>


In [1]:
import pandas as pd

### 1. Indexing using loc
* loc is primarily label based, but may also be used with a boolean array. .loc will raise KeyError when the items are not found.

In [2]:
jagadeesh_df = pd.read_csv('C:/New new/titanic-master/titanic_train.csv')

In [3]:
jagadeesh_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
jagadeesh_df.loc[2]

PassengerId                         3
Survived                            1
Pclass                              3
Name           Heikkinen, Miss. Laina
Sex                            female
Age                                26
SibSp                               0
Parch                               0
Ticket               STON/O2. 3101282
Fare                            7.925
Cabin                             NaN
Embarked                            S
Name: 2, dtype: object

* A list or array of labels ['a', 'b', 'c'].

In [5]:
jagadeesh_df.loc[[2,3,4]]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


* A slice object with labels 'a':'f' (Note that contrary to usual python slices, both the start and the stop are included, when present in the index. See Slicing with labels and Endpoints are inclusive.)

In [6]:
jagadeesh_df.loc[2:5]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q


* A boolean array

In [7]:
a = pd.Series(False, jagadeesh_df.index)

In [8]:
a[2] = True
a[3] = True

In [9]:
a[:5]

0    False
1    False
2     True
3     True
4    False
dtype: bool

In [10]:
jagadeesh_df.loc[a]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


* A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above).

In [11]:
def func(e):
    return e.Sex == 'female'

In [12]:
#will get  only those first five rows having Sex=female

In [13]:
jagadeesh_df.loc[func].head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


### 2. Indexing using iloc
* iloc is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a boolean array. 

In [14]:
jagadeesh_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


* An integer 

In [15]:
jagadeesh_df.iloc[2]

PassengerId                         3
Survived                            1
Pclass                              3
Name           Heikkinen, Miss. Laina
Sex                            female
Age                                26
SibSp                               0
Parch                               0
Ticket               STON/O2. 3101282
Fare                            7.925
Cabin                             NaN
Embarked                            S
Name: 2, dtype: object

* A list of array of integers

In [16]:
jagadeesh_df.iloc[[1,2,3]]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


* Slice object with 1:7

In [17]:
jagadeesh_df.iloc[1:7]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S


* A boolean array

In [18]:
a[:5]

0    False
1    False
2     True
3     True
4    False
dtype: bool

In [19]:
jagadeesh_df.iloc[a.values]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


* A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above).

In [20]:
def func(e):
    res = e.Sex == 'female'
    return res.values

jagadeesh_df.iloc[func].head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


### 3. Indexing using [ ]
* Access in a Series & DataFrame

In [21]:
jagadeesh_df['Name'][:5]

0                              Braund, Mr. Owen Harris
1    Cumings, Mrs. John Bradley (Florence Briggs Th...
2                               Heikkinen, Miss. Laina
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                             Allen, Mr. William Henry
Name: Name, dtype: object

In [22]:
jagadeesh_df.Name[5]

'Moran, Mr. James'

### 4. Selecting with isin
*  isin() method of Series, which returns a boolean vector that is true wherever the Series elements exist in the passed list. This allows you to select rows where one or more columns have values you want:

In [23]:
s = jagadeesh_df.Age

In [24]:
matches = s.isin([10,20,30])

In [25]:
jagadeesh_df[matches]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
12,13,0,3,"Saundercock, Mr. William Henry",male,20.0,0,0,A/5. 2151,8.05,,S
79,80,1,3,"Dowdell, Miss. Elizabeth",female,30.0,0,0,364516,12.475,,S
91,92,0,3,"Andreasson, Mr. Paul Edvin",male,20.0,0,0,347466,7.8542,,S
113,114,0,3,"Jussila, Miss. Katriina",female,20.0,1,0,4136,9.825,,S
131,132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20.0,0,0,SOTON/O.Q. 3101307,7.05,,S
157,158,0,3,"Corn, Mr. Harry",male,30.0,0,0,SOTON/OQ 392090,8.05,,S
178,179,0,2,"Hale, Mr. Reginald",male,30.0,0,0,250653,13.0,,S
213,214,0,2,"Givard, Mr. Hans Kristensen",male,30.0,0,0,250646,13.0,,S
219,220,0,2,"Harris, Mr. Walter",male,30.0,0,0,W/C 14208,10.5,,S
244,245,0,3,"Attalah, Mr. Sleiman",male,30.0,0,0,2694,7.225,,C


* DataFrame also has an isin() method. When calling isin, pass a set of values as either an array or dict. 
* If values is an array, isin returns a DataFrame of booleans that is the same shape as the original DataFrame, with True wherever the element is in the sequence of values.
* Just make values a dict where the key is the column, and the value is a list of items you want to check for.

In [26]:
matches = {'Pclass':[3], 'Age':[20,26]}

In [27]:
jagadeesh_df[['Pclass','Age']].isin(matches).all(axis=1)[:5]

0    False
1    False
2     True
3    False
4    False
dtype: bool

In [28]:
jagadeesh_df[jagadeesh_df[['Pclass','Age']].isin(matches).all(axis=1)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
12,13,0,3,"Saundercock, Mr. William Henry",male,20.0,0,0,A/5. 2151,8.05,,S
69,70,0,3,"Kink, Mr. Vincenz",male,26.0,2,0,315151,8.6625,,S
73,74,0,3,"Chronopoulos, Mr. Apostolos",male,26.0,1,0,2680,14.4542,,C
91,92,0,3,"Andreasson, Mr. Paul Edvin",male,20.0,0,0,347466,7.8542,,S
93,94,0,3,"Dean, Mr. Bertram Frank",male,26.0,1,2,C.A. 2315,20.575,,S
113,114,0,3,"Jussila, Miss. Katriina",female,20.0,1,0,4136,9.825,,S
131,132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20.0,0,0,SOTON/O.Q. 3101307,7.05,,S
162,163,0,3,"Bengtsson, Mr. John Viktor",male,26.0,0,0,347068,7.775,,S
207,208,1,3,"Albimona, Mr. Nassef Cassem",male,26.0,0,0,2699,18.7875,,C


### 5. Selecting data using where method
* Selecting values from a Series with a boolean vector generally returns a subset of the data. 
* To guarantee that selection output has the same shape as the original data, you can use the where method in Series and DataFrame.

In [29]:
import numpy as np

In [30]:
jagadeesh_df2 = pd.DataFrame(np.random.randn(20).reshape(4,5))

In [31]:
jagadeesh_df2

Unnamed: 0,0,1,2,3,4
0,-0.99661,1.48137,-0.138602,0.925768,0.079288
1,-0.547327,1.079624,1.059569,-1.466464,0.798432
2,-1.310994,1.10286,0.502321,1.31459,-0.17421
3,-0.126139,1.026085,1.956733,1.434981,-2.002093


In [32]:
jagadeesh_df2.where(jagadeesh_df2 > 0,-1)

Unnamed: 0,0,1,2,3,4
0,-1.0,1.48137,-1.0,0.925768,0.079288
1,-1.0,1.079624,1.059569,-1.0,0.798432
2,-1.0,1.10286,0.502321,1.31459,-1.0
3,-1.0,1.026085,1.956733,1.434981,-1.0


### 6. Selecting Data using Query
* DataFrame objects have a query() method that allows selection using an expression.
* Same query to multiple dataframes
* Slightly faster than python way of doing things

In [33]:
jagadeesh_df3 = pd.DataFrame(np.random.randint(10, size=(10, 2)), columns=list('bc'))
jagadeesh_df4 = pd.DataFrame(np.random.randint(15, size=(15, 2)), columns=list('bc'))

In [34]:
jagadeesh_df3

Unnamed: 0,b,c
0,9,0
1,3,6
2,0,7
3,0,9
4,1,9
5,2,8
6,7,5
7,5,8
8,6,7
9,8,0


In [35]:
jagadeesh_df3.query('b < c')

Unnamed: 0,b,c
1,3,6
2,0,7
3,0,9
4,1,9
5,2,8
7,5,8
8,6,7


In [36]:
res = map(lambda f: f.query('b < c'),[jagadeesh_df3,jagadeesh_df4])

In [37]:
for elem in res:
    print (elem)

   b  c
1  3  6
2  0  7
3  0  9
4  1  9
5  2  8
7  5  8
8  6  7
     b   c
0    2  10
3    3   6
5   10  11
7    2  10
8    0   1
11   8  12
14   4   8


In [38]:
jagadeesh_df5 = pd.DataFrame({'a': list('aabbccddeeff'), 'b': list('aaaabbbbcccc'),
                   'c': np.random.randint(5, size=12),                    
                   'd': np.random.randint(9, size=12)})

In [39]:
jagadeesh_df5

Unnamed: 0,a,b,c,d
0,a,a,4,1
1,a,a,0,1
2,b,a,3,3
3,b,a,4,2
4,c,b,1,8
5,c,b,0,3
6,d,b,0,0
7,d,b,2,2
8,e,c,3,5
9,e,c,4,1


In [40]:
jagadeesh_df5.query('a in b')

Unnamed: 0,a,b,c,d
0,a,a,4,1
1,a,a,0,1
2,b,a,3,3
3,b,a,4,2
4,c,b,1,8
5,c,b,0,3


In [41]:
jagadeesh_df5.query('a not in b')

Unnamed: 0,a,b,c,d
6,d,b,0,0
7,d,b,2,2
8,e,c,3,5
9,e,c,4,1
10,f,c,4,7
11,f,c,4,2


In [42]:
jagadeesh_df5.query('b == ["b","c"]')

Unnamed: 0,a,b,c,d
4,c,b,1,8
5,c,b,0,3
6,d,b,0,0
7,d,b,2,2
8,e,c,3,5
9,e,c,4,1
10,f,c,4,7
11,f,c,4,2


### 8. Set/Reset Index
* The pandas Index class and its subclasses can be viewed as implementing an ordered multiset. Duplicates are allowed. 
* However, if you try to convert an Index object with duplicate entries into a set, an exception will be raised.
* Index also provides the infrastructure necessary for lookups, data alignment, and reindexing. The easiest way to create an Index directly is to pass a list or other sequence to Index:

In [43]:
index = pd.Index(['e', 'd', 'a', 'b'])

In [44]:
jagadeesh_df6 = pd.DataFrame([1,2,3,4])

In [45]:
jagadeesh_df6.index = index

In [46]:
jagadeesh_df6

Unnamed: 0,0
e,1
d,2
a,3
b,4


In [47]:
#if we want to write a column name

jagadeesh_df6.columns=['variable']
jagadeesh_df6

Unnamed: 0,variable
e,1
d,2
a,3
b,4


In [48]:
#if we want to change/rename the column name, by creating dictionary
df_new=jagadeesh_df6.rename(columns={'variable':'variable1'})
df_new

Unnamed: 0,variable1
e,1
d,2
a,3
b,4


In [49]:
jagadeesh_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [50]:
jagadeesh_df.set_index(['Survived','Pclass'])

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Survived,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,3,1,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
1,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
1,1,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
0,3,5,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
0,...,...,...,...,...,...,...,...,...,...,...
0,2,887,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
1,1,888,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
0,3,889,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
1,1,890,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [51]:
jagadeesh_df_info=jagadeesh_df.set_index(['Survived','Pclass'])

In [52]:
jagadeesh_df_info.reset_index()

Unnamed: 0,Survived,Pclass,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,0,3,1,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,1,1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,1,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,1,1,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,0,3,5,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,887,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,1,1,888,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,0,3,889,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,1,1,890,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


### 9. Selecting columns by Type

In [53]:
jagadeesh_df.select_dtypes(include=['object'])

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
0,"Braund, Mr. Owen Harris",male,A/5 21171,,S
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,PC 17599,C85,C
2,"Heikkinen, Miss. Laina",female,STON/O2. 3101282,,S
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,113803,C123,S
4,"Allen, Mr. William Henry",male,373450,,S
...,...,...,...,...,...
886,"Montvila, Rev. Juozas",male,211536,,S
887,"Graham, Miss. Margaret Edith",female,112053,B42,S
888,"Johnston, Miss. Catherine Helen ""Carrie""",female,W./C. 6607,,S
889,"Behr, Mr. Karl Howell",male,111369,C148,C


In [54]:
jagadeesh_df.select_dtypes(include=['int64'])

Unnamed: 0,PassengerId,Survived,Pclass,SibSp,Parch
0,1,0,3,1,0
1,2,1,1,1,0
2,3,1,3,0,0
3,4,1,1,1,0
4,5,0,3,0,0
...,...,...,...,...,...
886,887,0,2,0,0
887,888,1,1,0,0
888,889,0,3,1,2
889,890,1,1,0,0


### 10. Accessing Multi-Index Data
* Many a times, excel data is multi-index, u can acccess in the following way

In [55]:
jagadeesh_df1= pd.read_csv('C:/New new/titanic-master/titanic_train.csv',index_col=[0,1])

In [56]:
jagadeesh_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
