In [1]:
import pandas as pd
import numpy as np

In [2]:
# Make a dataframe
parents = pd.DataFrame({'family_name': ['Yang', 'Chen'], 
                              'last_name': ['Jerome', 'Sammy'],
                              'gender': ['M', 'F'],
                              'age': [37, 36],
                              'height': [178, 160],
                             }, columns=['family_name', 'last_name', 'gender', 'age', 'height'])

parents

Unnamed: 0,family_name,last_name,gender,age,height
0,Yang,Jerome,M,37,178
1,Chen,Sammy,F,36,160


In [3]:
## make a new dataframe
children = pd.DataFrame({'family_name': ['Yang', 'Yang', 'Yang'], 
                              'last_name': ['Kyan', 'Janet', 'Kyan'],
                              'gender': ['M', 'F', 'M'],
                              'age': [8, 6, 4],
                              'height': [122, 110, 90],
                             }, columns=['family_name', 'last_name', 'gender', 'age', 'height'])
children

Unnamed: 0,family_name,last_name,gender,age,height
0,Yang,Kyan,M,8,122
1,Yang,Janet,F,6,110
2,Yang,Kyan,M,4,90


In [4]:
# append rows
family = parents.append(children, ignore_index=True)
family

Unnamed: 0,family_name,last_name,gender,age,height
0,Yang,Jerome,M,37,178
1,Chen,Sammy,F,36,160
2,Yang,Kyan,M,8,122
3,Yang,Janet,F,6,110
4,Yang,Kyan,M,4,90


In [5]:
# assign a new column
family = family.assign(weight=[72, 56, 26, 22, 19])
family

Unnamed: 0,family_name,last_name,gender,age,height,weight
0,Yang,Jerome,M,37,178,72
1,Chen,Sammy,F,36,160,56
2,Yang,Kyan,M,8,122,26
3,Yang,Janet,F,6,110,22
4,Yang,Kyan,M,4,90,19


In [6]:
# set index from given column name
family.set_index('last_name')


Unnamed: 0_level_0,family_name,gender,age,height,weight
last_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Jerome,Yang,M,37,178,72
Sammy,Chen,F,36,160,56
Kyan,Yang,M,8,122,26
Janet,Yang,F,6,110,22
Kyan,Yang,M,4,90,19


In [7]:
# set index from given column name and kept this column
family = family.set_index('last_name', drop=False)
family

Unnamed: 0_level_0,family_name,last_name,gender,age,height,weight
last_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Jerome,Yang,Jerome,M,37,178,72
Sammy,Chen,Sammy,F,36,160,56
Kyan,Yang,Kyan,M,8,122,26
Janet,Yang,Janet,F,6,110,22
Kyan,Yang,Kyan,M,4,90,19


In [8]:
# select a column as a series
family['last_name']

last_name
Jerome    Jerome
Sammy      Sammy
Kyan        Kyan
Janet      Janet
Kyan        Kyan
Name: last_name, dtype: object

In [9]:
# select a column as a dataframe
family[['last_name']]

Unnamed: 0_level_0,last_name
last_name,Unnamed: 1_level_1
Jerome,Jerome
Sammy,Sammy
Kyan,Kyan
Janet,Janet
Kyan,Kyan


In [10]:
# select several columns as a dataframe
family[['last_name', 'height']]

Unnamed: 0_level_0,last_name,height
last_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Jerome,Jerome,178
Sammy,Sammy,160
Kyan,Kyan,122
Janet,Janet,110
Kyan,Kyan,90


In [13]:
# select a row as a series by its label
family.loc['Jerome']

family_name      Yang
last_name      Jerome
gender              M
age                37
height            178
weight             72
Name: Jerome, dtype: object

In [12]:
# select a row as a series by its index
family.iloc[0]

family_name      Yang
last_name      Jerome
gender              M
age                37
height            178
weight             72
Name: Jerome, dtype: object

In [24]:
# select a row by its label or index
# .ix will try .loc first, and fallback to iloc if no matched label
# So, the behavior of .ix is a little bit tricky 
# see http://stackoverflow.com/questions/31593201/pandas-iloc-vs-ix-vs-loc-explanation
# The results of below two are equal
family.ix['Jerome']
family.ix[0]

family_name      Yang
last_name      Jerome
gender              M
age                37
height            178
weight             72
Name: Jerome, dtype: object

In [9]:
# select several rows as a dataframe
family.ix[[0,3]]

Unnamed: 0,family_name,last_name,gender,age,height,weight
0,Yang,Jerome,M,37,178,72
3,Yang,Janet,F,6,110,22


In [26]:
# select a cell by its label, very fast function
family.at['Jerome', 'height']


178

In [28]:
# select a cell by its index, very fast function
family.iat[0, 4]

178

In [36]:
# make simple integer index
# (if we want to keep the original index, just use the default setting of drop=False)
family = family.reset_index(drop=True)
family 

Unnamed: 0,family_name,last_name,gender,age,height,weight
0,Yang,Jerome,M,37,178,72
1,Chen,Sammy,F,36,160,56
2,Yang,Kyan,M,8,122,26
3,Yang,Janet,F,6,110,22
4,Yang,Kyan,M,4,90,19


In [37]:
# apply a function on single column
def firstchar(string):
    return string[0]
family['last_name'].apply(firstchar)

0    J
1    S
2    K
3    J
4    K
Name: last_name, dtype: object

In [38]:
# assign a new column for the function's output
firstchar_series = family['last_name'].apply(firstchar)
family.assign(first_char=firstchar_series)

Unnamed: 0,family_name,last_name,gender,age,height,weight,first_char
0,Yang,Jerome,M,37,178,72,J
1,Chen,Sammy,F,36,160,56,S
2,Yang,Kyan,M,8,122,26,K
3,Yang,Janet,F,6,110,22,J
4,Yang,Kyan,M,4,90,19,K


In [39]:
# apply a function related to several columns

def join(strings):
    # strings is a series (of strings), not a list (of strings)
    return ' '.join(strings)

family[['family_name', 'last_name']].apply(join, axis=1)

0    Yang Jerome
1     Chen Sammy
2      Yang Kyan
3     Yang Janet
4      Yang Kyan
dtype: object

In [40]:
# apply a function with a row as parameter
def bmi(weight_and_height):
    # weight_and_height is a series, not list
    weight = weight_and_height[0]
    height = weight_and_height[1]
    height_in_meter = height / 100
    bmi = weight / (height_in_meter ** 2)
    return bmi

bmiseries = family[['weight', 'height']].apply(bmi, axis=1)
bmiseries

0    22.724403
1    21.875000
2    17.468422
3    18.181818
4    23.456790
dtype: float64

In [41]:
# assign calculated bmi to a new column
family.assign(bmi=bmiseries)

Unnamed: 0,family_name,last_name,gender,age,height,weight,bmi
0,Yang,Jerome,M,37,178,72,22.724403
1,Chen,Sammy,F,36,160,56,21.875
2,Yang,Kyan,M,8,122,26,17.468422
3,Yang,Janet,F,6,110,22,18.181818
4,Yang,Kyan,M,4,90,19,23.45679


In [42]:
# add blank cells in the leftest column
blank = pd.DataFrame([], columns=['blank1', 'blank2']); blank


Unnamed: 0,blank1,blank2


In [43]:
# do this
blank.append(family)

Unnamed: 0,age,blank1,blank2,family_name,gender,height,last_name,weight
0,37,,,Yang,M,178,Jerome,72
1,36,,,Chen,F,160,Sammy,56
2,8,,,Yang,M,122,Kyan,26
3,6,,,Yang,F,110,Janet,22
4,4,,,Yang,M,90,Kyan,19


In [44]:
# You cannot alter its order
family.append(blank)

Unnamed: 0,age,blank1,blank2,family_name,gender,height,last_name,weight
0,37,,,Yang,M,178,Jerome,72
1,36,,,Chen,F,160,Sammy,56
2,8,,,Yang,M,122,Kyan,26
3,6,,,Yang,F,110,Janet,22
4,4,,,Yang,M,90,Kyan,19
