In [2]:
weekly_income1 = {
    'sun': {'service':2000, 'food': 4500, 'gift': 500},
    'mon': {'service':3000, 'food': 5000, 'gift': 800},
    'tue': {'service':1500, 'food': 2200},
    'wed': {'service':1800, 'food': 8000, 'gift': 100, 'venue':3000},
}
weekly_income2 = {
    'sun': {'service':1200, 'gift': 140, 'bekery':1500},
    'mon': {'service':1300, 'gift': 300, 'bekery':900},
    'tue': {'service':800, 'gift':200, 'bekery':1500},
    'wed': {'service':1000,'gift': 300, 'bekery': 2300},
}

### Series Arithmetic

In [3]:
from pandas import Series, DataFrame
s1 = Series(weekly_income1['wed'])
s2 = Series(weekly_income2['wed'])
display(s1+s2)

display(s1.add(s2, fill_value=0))

bekery        NaN
food          NaN
gift        400.0
service    2800.0
venue         NaN
dtype: float64

bekery     2300.0
food       8000.0
gift        400.0
service    2800.0
venue      3000.0
dtype: float64

### DataFrame Arithmetic

In [4]:
df_weekly_income1 = DataFrame(weekly_income1)
df_weekly_income2 = DataFrame(weekly_income2)

display(df_weekly_income1 + df_weekly_income2)
display(df_weekly_income1.add(df_weekly_income2, fill_value=0))

display(df_weekly_income1.div(1000, fill_value=0))

Unnamed: 0,sun,mon,tue,wed
bekery,,,,
food,,,,
gift,640.0,1100.0,,400.0
service,3200.0,4300.0,2300.0,2800.0
venue,,,,


Unnamed: 0,sun,mon,tue,wed
bekery,1500.0,900.0,1500.0,2300.0
food,4500.0,5000.0,2200.0,8000.0
gift,640.0,1100.0,200.0,400.0
service,3200.0,4300.0,2300.0,2800.0
venue,,,,3000.0


Unnamed: 0,sun,mon,tue,wed
food,4.5,5.0,2.2,8.0
gift,0.5,0.8,0.0,0.1
service,2.0,3.0,1.5,1.8
venue,0.0,0.0,0.0,3.0


### DataFrame And Series
- Arithmetic operation is performed once for each row. 
- Series labels and columns labels are aligned by default
- To align Series labels and rows labels "axis=0" or "axis='index'" argument is sent

In [5]:
s1 = Series(weekly_income1['wed'])
display(df_weekly_income1.T/s1) ## income relative to wed income.

display(df_weekly_income1.div(s1, axis='index'))


Unnamed: 0,food,gift,service,venue
sun,0.5625,5.0,1.111111,
mon,0.625,8.0,1.666667,
tue,0.275,,0.833333,
wed,1.0,1.0,1.0,1.0


Unnamed: 0,sun,mon,tue,wed
food,0.5625,0.625,0.275,1.0
gift,5.0,8.0,,1.0
service,1.111111,1.666667,0.833333,1.0
venue,,,,1.0


### numpy ufuncs with DataFrame

In [6]:
import numpy as np

display(np.mean(df_weekly_income1))

display(np.sqrt(df_weekly_income1))

sun    2333.333333
mon    2933.333333
tue    1850.000000
wed    3225.000000
dtype: float64

Unnamed: 0,sun,mon,tue,wed
food,67.082039,70.710678,46.904158,89.442719
gift,22.36068,28.284271,,10.0
service,44.72136,54.772256,38.729833,42.426407
venue,,,,54.772256


### Lambda mapping

In [7]:
fun_range = lambda x: x.max() - x.min()

display(df_weekly_income1)
display(df_weekly_income1.apply(fun_range, axis='index'))
display(df_weekly_income1.apply(fun_range, axis='columns'))

Unnamed: 0,sun,mon,tue,wed
food,4500.0,5000.0,2200.0,8000
gift,500.0,800.0,,100
service,2000.0,3000.0,1500.0,1800
venue,,,,3000


sun    4000.0
mon    4200.0
tue     700.0
wed    7900.0
dtype: float64

food       5800.0
gift        700.0
service    1500.0
venue         0.0
dtype: float64

### Maping function returning series

In [8]:
def max_min(x):
    return Series([x.max(), x.min()], ['max', 'min'])

display(df_weekly_income1.apply(max_min))
display(df_weekly_income1.apply(max_min, axis='columns'))

format_cur = lambda x: '$%.2f'%x
display(df_weekly_income1.applymap(format_cur))

Unnamed: 0,sun,mon,tue,wed
max,4500.0,5000.0,2200.0,8000
min,500.0,800.0,1500.0,100


Unnamed: 0,max,min
food,8000.0,2200.0
gift,800.0,100.0
service,3000.0,1500.0
venue,3000.0,3000.0


Unnamed: 0,sun,mon,tue,wed
food,$4500.00,$5000.00,$2200.00,$8000.00
gift,$500.00,$800.00,$nan,$100.00
service,$2000.00,$3000.00,$1500.00,$1800.00
venue,$nan,$nan,$nan,$3000.00


### Sorting

In [9]:
display('sort the indexes', s1.sort_index())
display('sort the values', s1.sort_values())

display('sort index', df_weekly_income1.sort_index())
display('sort values by "sun" income', df_weekly_income1.sort_values(by='sun'))

display('sort index', df_weekly_income1.sort_index(axis='columns'))
display('sort values by "sun" income', df_weekly_income1.sort_values(by='food', axis='columns'))

'sort the indexes'

food       8000
gift        100
service    1800
venue      3000
dtype: int64

'sort the values'

gift        100
service    1800
venue      3000
food       8000
dtype: int64

'sort index'

Unnamed: 0,sun,mon,tue,wed
food,4500.0,5000.0,2200.0,8000
gift,500.0,800.0,,100
service,2000.0,3000.0,1500.0,1800
venue,,,,3000


'sort values by "sun" income'

Unnamed: 0,sun,mon,tue,wed
gift,500.0,800.0,,100
service,2000.0,3000.0,1500.0,1800
food,4500.0,5000.0,2200.0,8000
venue,,,,3000


'sort index'

Unnamed: 0,mon,sun,tue,wed
food,5000.0,4500.0,2200.0,8000
gift,800.0,500.0,,100
service,3000.0,2000.0,1500.0,1800
venue,,,,3000


'sort values by "sun" income'

Unnamed: 0,tue,sun,mon,wed
food,2200.0,4500.0,5000.0,8000
gift,,500.0,800.0,100
service,1500.0,2000.0,3000.0,1800
venue,,,,3000


### Ranking

In [10]:
display(df_weekly_income1.rank())
display(df_weekly_income1.rank(axis='columns', method='first')) 
## by default tie breaker of same values is done by 
## giving average rank for the value. "method='first'"
## will give lower rank to value observed first
                                                                

Unnamed: 0,sun,mon,tue,wed
food,3.0,3.0,2.0,4.0
gift,1.0,1.0,,1.0
service,2.0,2.0,1.0,2.0
venue,,,,3.0


Unnamed: 0,sun,mon,tue,wed
food,2.0,3.0,1.0,4.0
gift,2.0,3.0,,1.0
service,3.0,4.0,1.0,2.0
venue,,,,1.0


In [37]:
import numpy as np
rnd_state = np.random.RandomState(5)
rnd_table = rnd_state.choice(5, (4,5))
df_rand_table = DataFrame(rnd_table)
df_rand_table.columns = Series(list('abbae'))
display(df_rand_table)

## Rank across rows (default). Equal values are assigned average rank
display(df_rand_table.rank(axis='index'))
## Rank across columns. Equal values are assigned average rank
display(df_rand_table.rank(axis='columns'))
## Equal values are assigned min rank and ranks are skipped for number of equal values
display(df_rand_table.rank(axis='columns', method='min'))
## Equal values are assigned min rank but next rank is not skipped and assigned to next value
display(df_rand_table.rank(axis='columns', method='dense'))


Unnamed: 0,a,b,b.1,a.1,e
0,3,0,1,0,4
1,3,0,0,4,1
2,0,3,4,3,1
3,4,2,1,1,2


Unnamed: 0,a,b,b.1,a.1,e
0,2.5,1.5,2.5,1.0,4.0
1,2.5,1.5,1.0,4.0,1.5
2,1.0,4.0,4.0,3.0,1.5
3,4.0,3.0,2.5,2.0,3.0


Unnamed: 0,a,b,b.1,a.1,e
0,4.0,1.5,3.0,1.5,5.0
1,4.0,1.5,1.5,5.0,3.0
2,1.0,3.5,5.0,3.5,2.0
3,5.0,3.5,1.5,1.5,3.5


Unnamed: 0,a,b,b.1,a.1,e
0,4.0,1.0,3.0,1.0,5.0
1,4.0,1.0,1.0,5.0,3.0
2,1.0,3.0,5.0,3.0,2.0
3,5.0,3.0,1.0,1.0,3.0


Unnamed: 0,a,b,b.1,a.1,e
0,3.0,1.0,2.0,1.0,4.0
1,3.0,1.0,1.0,4.0,2.0
2,1.0,3.0,4.0,3.0,2.0
3,3.0,2.0,1.0,1.0,2.0


### Indexing Duplicate index

In [48]:
dup_series = df_rand_table.loc[0]
## Indexing with unique index, returns scalar
display(dup_series['e'])
## Indexing with duplicate index, returns Series
display(dup_series['a'])


## Indexing with unique index, returns Series
display(df_rand_table['e'])
## Indexing with duplicate index, returns DataFrame
display(df_rand_table['a'])

4

a    3
a    0
Name: 0, dtype: int64

0    4
1    1
2    1
3    2
Name: e, dtype: int64

Unnamed: 0,a,a.1
0,3,0
1,3,4
2,0,3
3,4,1
