# 1. Python Machine Learning basics

## 1) Handling with numpy

In [1]:
import numpy as np

In [2]:
array1 = np.array([1,2,3])
print('array1 type: ', type(array1))
print('array1 array shape: ', array1.shape)

array1 type:  <class 'numpy.ndarray'>
array1 array shape:  (3,)


In [3]:
array2 = np.array([[1,2,3],
                  [2,3,4]])
print('array2 type: ', type(array2))
print('array2 array shape: ', array2.shape)

array2 type:  <class 'numpy.ndarray'>
array2 array shape:  (2, 3)


In [4]:
array3 = np.array([[1,2,3]])
print('array3 type: ', type(array3))
print('array3 array shape: ', array3.shape)

array3 type:  <class 'numpy.ndarray'>
array3 array shape:  (1, 3)


In [5]:
print('array1: {:0}차원, array2: {:1}차원, array3: {:2}차원'.format(array1.ndim,array2.ndim,array3.ndim))

array1: 1차원, array2: 2차원, array3:  2차원


In [6]:
# data type in ndarray is only possible for same data type

In [7]:
list1 = [1,2,3]
print(type(list1))
array1 = np.array(list1)
print(type(array1))
print(array1, array1.dtype)

<class 'list'>
<class 'numpy.ndarray'>
[1 2 3] int32


In [8]:
list2 = [1,2,'test']
array2 = np.array(list2)
print(array2, array2.dtype)
#automatically changed into character type
#because their data type should be same

['1' '2' 'test'] <U11


In [9]:
list3 = [1,2,3.0]
array3 = np.array(list3)
print(array3, array3.dtype)

[1. 2. 3.] float64


In [10]:
array_int = np.array([1,2,3])
array_float = array_int.astype('float64')
print(array_float, array_float.dtype)

[1. 2. 3.] float64


In [11]:
array_int1 = array_float.astype('int32')
print(array_int1, array_int1.dtype)

[1 2 3] int32


In [12]:
array_float1 = np.array([1.1, 2.1, 3.1])
array_int2 = array_float1.astype('int32')
print(array_int2, array_int2.dtype)

[1 2 3] int32


In [13]:
#easily generating ndarray with arange, zeros, ones

In [14]:
sequence_array = np.arange(10)
print(sequence_array)
print(sequence_array.dtype, sequence_array.shape)

[0 1 2 3 4 5 6 7 8 9]
int32 (10,)


In [15]:
zero_array = np.zeros((3,2), dtype = 'int32')
print(zero_array)
print(zero_array.dtype, zero_array.shape)

[[0 0]
 [0 0]
 [0 0]]
int32 (3, 2)


In [16]:
one_array = np.ones((3,2))
print(one_array)
print(one_array.dtype, one_array.shape)

[[1. 1.]
 [1. 1.]
 [1. 1.]]
float64 (3, 2)


In [17]:
#what if we want to reshape our ndarray size and dimension?
array1 = np.arange(10)
print('array1:\n', array1)

array1:
 [0 1 2 3 4 5 6 7 8 9]


In [18]:
array2 = array1.reshape(2,5)
print('array2:\n', array2)

array2:
 [[0 1 2 3 4]
 [5 6 7 8 9]]


In [19]:
array3 = array1.reshape(5,2)
print('array3:\n', array3)

array3:
 [[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


In [20]:
# if we use index -1 for the reshape(), we can easily reshape into new format

In [21]:
array1 = np.arange(10)
print(array1)
array2 = array1.reshape(-1,5)
print('array2 shape:', array2.shape)
print(array2)
array3 = array1.reshape(5,-1)
print('array3 shape:', array3.shape)
print(array3)

[0 1 2 3 4 5 6 7 8 9]
array2 shape: (2, 5)
[[0 1 2 3 4]
 [5 6 7 8 9]]
array3 shape: (5, 2)
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


In [22]:
array1 = np.arange(8)
array3d = array1.reshape((2,2,2))
print('array3d:\n', array3d.tolist())

array3d:
 [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]


In [23]:
# change 3 dimension ndarray into 2 dimension
array5 = array3d.reshape(-1,1)
print('array5:\n', array5.tolist())
print('array5 shape:', array5.shape)

array5:
 [[0], [1], [2], [3], [4], [5], [6], [7]]
array5 shape: (8, 1)


In [24]:
# change 1 dimension ndarray into 2 dimension
array6 = array1.reshape(-1,1)
print('array6:\n', array6.tolist())
print('array6 shape:', array6.shape)

array6:
 [[0], [1], [2], [3], [4], [5], [6], [7]]
array6 shape: (8, 1)


## 2) Indexing ndarray dataset

In [25]:
# what if we want to cut off some part of the dataset? -> use indexing

In [26]:
# selecting single data value
array1 = np.arange(start=1, stop=10)
print('array1:', array1)

# we should always remember that indexing starts from "0"
value = array1[2]
print('value:', value)
print(type(value))

array1: [1 2 3 4 5 6 7 8 9]
value: 3
<class 'numpy.int32'>


In [27]:
print('last value:', array1[-1], 'second last value:', array1[-2])

last value: 9 second last value: 8


In [28]:
# we can also easily edit the ndarray vlue with indexing
array1[0] = 9
array1[8] = 0
print('array1:', array1)

array1: [9 2 3 4 5 6 7 8 0]


In [29]:
array1d = np.arange(start=1, stop=10)
array2d = array1d.reshape(3,3)
print(array2d) #reshape into 2 dimension

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [30]:
print('(row=0, col=0) indexing value:', array2d[0,0])
print('(row=0, col=1) indexing value:', array2d[0,1])
print('(row=1, col=0) indexing value:', array2d[1,0])
print('(row=2, col=2) indexing value:', array2d[2,2])

(row=0, col=0) indexing value: 1
(row=0, col=1) indexing value: 2
(row=1, col=0) indexing value: 4
(row=2, col=2) indexing value: 9


In [31]:
# if we use : sign, we can slice and use sequential data value
array1 = np.arange(start=1, stop=10)
array3 = array1[0:3]
print(array3)
print(type(array3))

[1 2 3]
<class 'numpy.ndarray'>


In [32]:
# empty before : means start from the beginning
array1 = np.arange(start=1, stop=10)
array4 = array1[:3]
print(array4)

# empty after : means ends at the last value
array5 = array1[3:]
print(array5)

# empty numbers with : means include all values
array6 = array1[:]
print(array6)

[1 2 3]
[4 5 6 7 8 9]
[1 2 3 4 5 6 7 8 9]


In [33]:
# then, how can we slice the data values from 2 dimension?

array1d = np.arange(start=1, stop=10)
array2d = array1d.reshape(3,3)
print('array2d:\n',array2d)

print('array2d[0:2, 0:2] \n', array2d[0:2, 0:2])
print('array2d[1:3, 0:3] \n', array2d[1:3, 0:3])
print('array2d[1:3, :] \n', array2d[1:3, :])
print('array2d[:, :] \n', array2d[:, :])
print('array2d[:2, 1:] \n', array2d[:2, 1:])
print('array2d[:2, 0] \n', array2d[:2, 0])

array2d:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
array2d[0:2, 0:2] 
 [[1 2]
 [4 5]]
array2d[1:3, 0:3] 
 [[4 5 6]
 [7 8 9]]
array2d[1:3, :] 
 [[4 5 6]
 [7 8 9]]
array2d[:, :] 
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
array2d[:2, 1:] 
 [[2 3]
 [5 6]]
array2d[:2, 0] 
 [1 4]


In [34]:
# if we empty the number at the 2 dimension ndarray, it returns 1 dimension
print(array2d[0])
print(array2d[1])
print('array2d[0] shape:', array2d[0].shape, 
      'array2d[1] shape:', array2d[1].shape)

[1 2 3]
[4 5 6]
array2d[0] shape: (3,) array2d[1] shape: (3,)


In [35]:
# Fancy Indexing
# set index set with list or ndarray, then return ndarray in that position

array1d = np.arange(start=1, stop=10)
array2d = array1d.reshape(3, 3)

array3 = array2d[[0,1], 2]
print('array2d[[0,1],2] => ', array3.tolist())

array4 = array2d[[0,1], 0:2]
print('array2d[[0,1], 0:2] => ', array4.tolist())

array5 = array2d[[0,1]]
print('array2d[[0,1]] => ', array5.tolist() )

array2d[[0,1],2] =>  [3, 6]
array2d[[0,1], 0:2] =>  [[1, 2], [4, 5]]
array2d[[0,1]] =>  [[1, 2, 3], [4, 5, 6]]


In [36]:
# Boolean indexing
# possible to condition filtering and searching
array1d = np.arange(start=1, stop=10)
array3 = array1d[array1d > 5] #set Boolean indexing in []
print('array1d > 5 Boolean indexing result: ', array3)

array1d > 5 Boolean indexing result:  [6 7 8 9]


In [37]:
# what if just simply do condition filtering?
array1d > 5 # it returns true or false

array([False, False, False, False, False,  True,  True,  True,  True])

In [38]:
boolean_indexes = np.array([False, False, False, False, False,
                           True, True, True, True])
array3 = array1d[boolean_indexes]
print('Boolean index filtering result:', array3)

Boolean index filtering result: [6 7 8 9]


In [39]:
indexes = np.array([5,6,7,8])
array4 = array1d[indexes]
print('filtering with normal indexing:', array4)

filtering with normal indexing: [6 7 8 9]


## 3) sort() and argsort()

In [40]:
org_array = np.array([3,1,9,5])
print('original matrix:', org_array)

# use np.sort()
sort_array1 = np.sort(org_array)
print('np.sort() result matrix: ', sort_array1)
print('original matrix after np.sort(): ', org_array)

original matrix: [3 1 9 5]
np.sort() result matrix:  [1 3 5 9]
original matrix after np.sort():  [3 1 9 5]


In [41]:
#sorting with ndarray.sort()
sort_array2 = org_array.sort()
print('org_array.sort() result matrix: ', sort_array2)
print('original matrix after org_array.sort(): ', org_array)

org_array.sort() result matrix:  None
original matrix after org_array.sort():  [1 3 5 9]


In [42]:
# what about descending order?
sort_array1_desc = np.sort(org_array)[::-1]
print('with descending order: ', sort_array1_desc)

with descending order:  [9 5 3 1]


In [43]:
# when it is more than 2 dimension, we can order them by column or row.
array2d = np.array([[8, 12],
                   [7, 1]])

sort_array2d_axis0 = np.sort(array2d, axis=0)
print('order by row axis: \n', sort_array2d_axis0)

sort_array2d_axis1 = np.sort(array2d, axis=1)
print('order by column axis: \n', sort_array2d_axis1)

order by row axis: 
 [[ 7  1]
 [ 8 12]]
order by column axis: 
 [[ 8 12]
 [ 1  7]]


In [44]:
org_array = np.array([ 3, 1, 9, 5]) 
sort_indices = np.argsort(org_array)
print(type(sort_indices))
print('index of original matrix is :', sort_indices)

<class 'numpy.ndarray'>
index of original matrix is : [1 0 3 2]


In [45]:
org_array = np.array([ 3, 1, 9, 5]) 
sort_indices_desc = np.argsort(org_array)[::-1]
print('index of original matrix when descending ordered :', sort_indices_desc)

index of original matrix when descending ordered : [2 3 0 1]


In [46]:
import numpy as np

name_array = np.array(['David', 'Andrew', 'Daniel', 'Sam', 'Kevin'])
score_array= np.array([78, 95, 84, 98, 88])

sort_indices_asc = np.argsort(score_array)
print('ascending ordered by score, then the index of score_array is :', sort_indices_asc)
print('ascending ordered by score, then the name of name_array is :', name_array[sort_indices_asc])

ascending ordered by score, then the index of score_array is : [0 2 4 1 3]
ascending ordered by score, then the name of name_array is : ['David' 'Daniel' 'Kevin' 'Andrew' 'Sam']


## 4) np.dot and the transpose of the matrix

In [48]:
A = np.array( [[1,2,3],
              [4,5,6]])
B = np.array([[7,8],
             [9,10],
             [11,12]])

In [49]:
dot_product = np.dot(A, B)
print('result is :\n', dot_product)

result is :
 [[ 58  64]
 [139 154]]


In [50]:
# what if we need the transpose of the matrix
A = np.array([[1,2],
            [3,4]])
transpose_mat = np.transpose(A)
print('result is :\n', transpose_mat)

result is :
 [[1 3]
 [2 4]]


## 5) data handling with pandas

In [51]:
import pandas as pd

In [55]:
titanic_df = pd.read_csv('./data/titanic_train.csv') #loading the data
print('titanic variable type:',type(titanic_df)) #check the data type in titanic data
titanic_df

titanic variable type: <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [57]:
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [58]:
# check the number of values in specific column variable
value_counts = titanic_df['Pclass'].value_counts()
print(value_counts)

3    491
1    216
2    184
Name: Pclass, dtype: int64


In [60]:
#filtering the column that we want
titanic_pclass = titanic_df['Pclass']
print(type(titanic_pclass))
titanic_pclass.head()

<class 'pandas.core.series.Series'>


0    3
1    1
2    3
3    1
4    3
Name: Pclass, dtype: int64

In [61]:
value_counts = titanic_df['Pclass'].value_counts()
print(type(value_counts))
print(value_counts)

<class 'pandas.core.series.Series'>
3    491
1    216
2    184
Name: Pclass, dtype: int64


In [62]:
# DataFrame, list, dictionary, numpy ndarray

In [64]:
## numpy ndarray, list, dictionary -> dataframe

import numpy as np
col_name1=['col1']
list1 = [1,2,3]
array1 = np.array(list1)
print('array1 shape: ', array1.shape)

#using list to form dataframe
df_list1 = pd.DataFrame(list1, columns = col_name1)
print('DataFrame generated from 1dimension list: \n', df_list1)

#using ndarray to form dataframe
df_array1 =pd.DataFrame(array1, columns = col_name1)
print('DataFrame generated from 1dimension ndarray: \n', df_array1)

array1 shape:  (3,)
DataFrame generated from 1dimension list: 
    col1
0     1
1     2
2     3
DataFrame generated from 1dimension ndarray: 
    col1
0     1
1     2
2     3


In [65]:
#let's try with more 2 dimension form of data

#we set 3 different column names
col_name2 = ['col1', 'col2', 'col3']

# generate 2x3 format list and ndarray, then change into dataframe
list2 = [[1,2,3],
        [11,12,13]]
array2 = np.array(list2)
print('array shape: ', array2.shape)
df_list2 = pd.DataFrame(list2, columns=col_name2)
print('DataFrame generated from 2 dimension list: \n', df_list2)
df_array2 = pd.DataFrame(array2, columns=col_name2)
print('DataFrame generated from 2 dimension ndarray: \n', df_array2)

array shape:  (2, 3)
DataFrame generated from 2 dimension list: 
    col1  col2  col3
0     1     2     3
1    11    12    13
DataFrame generated from 2 dimension ndarray: 
    col1  col2  col3
0     1     2     3
1    11    12    13


In [66]:
# Dictionary into DataFrame?
# mapping key with character column name, and mapping value with list or ndarray format column data
dict = {'col1': [1,11], 'col2':[2,22], 'col3':[3,33]}
df_dict=pd.DataFrame(dict)
print('DataFrame generated from the Dictionary is :\n', df_dict)

DataFrame generated from the Dictionary is :
    col1  col2  col3
0     1     2     3
1    11    22    33


In [67]:
# DataFrame into ndarray
array3 = df_dict.values
print('df_dict.values type is: ', type(array3), 'df_dict.values shape is: ', array3.shape)
print(array3)

df_dict.values type is:  <class 'numpy.ndarray'> df_dict.values shape is:  (2, 3)
[[ 1  2  3]
 [11 22 33]]


In [68]:
# Now, what if we want to generate list or dictionary from DataFrame?

# DataFrame into list
list3 = df_dict.values.tolist()
print('df_dict.values.tolist() type is: ', type(list3))
print(list3)

# DataFrame into Dictionary
dict3 = df_dict.to_dict('list')
print('\n df_dict.to.dict() type is: ', type(dict3))
print(dict3)

df_dict.values.tolist() type is:  <class 'list'>
[[1, 2, 3], [11, 22, 33]]

 df_dict.to.dict() type is:  <class 'dict'>
{'col1': [1, 11], 'col2': [2, 22], 'col3': [3, 33]}


In [70]:
# Generating dataset and edit them from DataFrame

titanic_df['Age_0'] = 0
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age_0
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0


In [72]:
# we can generate new column by calculating existing column of the dataset

titanic_df['Age_by_10'] = titanic_df['Age']*10
titanic_df['Family_No'] = titanic_df['SibSp'] + titanic_df['Parch']+1
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age_0,Age_by_10,Family_No
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,220.0,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,380.0,2
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,260.0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0,350.0,2
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,350.0,1


In [73]:
# we can also edit the data column
titanic_df['Age_by_10'] = titanic_df['Age_by_10'] + 100
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age_0,Age_by_10,Family_No
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,320.0,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,480.0,2
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,360.0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0,450.0,2
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,450.0,1


In [74]:
# what if we want to delete the data -> use drop()
# axis=0 => row direction
# axis=1 => column direction

titanic_drop_df = titanic_df.drop('Age_0', axis=1)
titanic_drop_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age_by_10,Family_No
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,320.0,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,480.0,2
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,360.0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,450.0,2
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,450.0,1


In [75]:
# inplace=True option is deleting data from DataFrame

drop_result = titanic_df.drop(['Age_0', 'Age_by_10', 'Family_No'], axis=1, inplace=True)
print('result return after drop with inplace=True option: ', drop_result)
titanic_df.head()

result return after drop with inplace=True option:  None


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [76]:
# what if we set axis=0? 
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 15)
print('#### before axis 0 drop ####')
print(titanic_df.head())

titanic_df.drop([0,1,2], axis=0, inplace=True)

print('#### after axis 0 drop ####')
print(titanic_df.head())

#### before axis 0 drop ####
   PassengerId  Survived  Pclass            Name     Sex   Age  SibSp  Parch          Ticket     Fare Cabin Embarked
0            1         0       3  Braund, Mr....    male  22.0      1      0       A/5 21171   7.2500   NaN        S
1            2         1       1  Cumings, Mr...  female  38.0      1      0        PC 17599  71.2833   C85        C
2            3         1       3  Heikkinen, ...  female  26.0      0      0  STON/O2. 31...   7.9250   NaN        S
3            4         1       1  Futrelle, M...  female  35.0      1      0          113803  53.1000  C123        S
4            5         0       3  Allen, Mr. ...    male  35.0      0      0          373450   8.0500   NaN        S
#### after axis 0 drop ####
   PassengerId  Survived  Pclass            Name     Sex   Age  SibSp  Parch  Ticket     Fare Cabin Embarked
3            4         1       1  Futrelle, M...  female  35.0      1      0  113803  53.1000  C123        S
4            5         

In [78]:
# index object?

# lets reload the original file
titanic_df = pd.read_csv('./data/titanic_train.csv')
# index object
indexes = titanic_df.index
print(indexes)
#index object into real value array
print('Index object array value: \n', indexes.values)

RangeIndex(start=0, stop=891, step=1)
Index object array value: 
 [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 2

In [None]:
print(type(indexes.values))