# Operations between Data Structures

In [1]:
import numpy as np
import pandas as pd

---

## Flexible Arithmetic Methods

add, sub, div, mul

In [2]:
frame1 = pd.DataFrame(np.arange(16).reshape((4,4)),
                      index=['red','blue','yellow','white'],
                      columns=['ball','pen','pencil','paper'])
frame2 = pd.DataFrame(np.arange(12).reshape((4,3)),
                      index=['blue','green','white','yellow'],
                      columns=['mug','pen','ball'])

In [3]:
frame1

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [4]:
frame2

Unnamed: 0,mug,pen,ball
blue,0,1,2
green,3,4,5
white,6,7,8
yellow,9,10,11


In [6]:
frame1.add(frame2)

Unnamed: 0,ball,mug,paper,pen,pencil
blue,6.0,,,6.0,
green,,,,,
red,,,,,
white,20.0,,,20.0,
yellow,19.0,,,19.0,


---

## Operations between DataFrame and Series

In [7]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                     index=['red','blue','yellow','white'],
                     columns=['ball','pen','pencil','paper'])

In [8]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [9]:
frame.loc['red']

ball      0
pen       1
pencil    2
paper     3
Name: red, dtype: int32

In [10]:
frame - frame.loc['red']

Unnamed: 0,ball,pen,pencil,paper
red,0,0,0,0
blue,4,4,4,4
yellow,8,8,8,8
white,12,12,12,12


---

# Function Application and Mapping

## Functions by Element

In [11]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                     index=['red','blue','yellow','white'],
                     columns=['ball','pen','pencil','paper'])

In [12]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [13]:
np.sqrt(frame)

Unnamed: 0,ball,pen,pencil,paper
red,0.0,1.0,1.414214,1.732051
blue,2.0,2.236068,2.44949,2.645751
yellow,2.828427,3.0,3.162278,3.316625
white,3.464102,3.605551,3.741657,3.872983


## Functions by Row or Column

The application of the functions is not limited to the ufunc functions, but also includes those defined by the user. The important thing is that they operate on a one-dimensional array, giving a single number for result.

In [19]:
range = lambda x: x.max() - x.min()

In [20]:
frame.apply(range) 

ball      12
pen       12
pencil    12
paper     12
dtype: int64

In [21]:
frame.apply(range, axis=1)

red       3
blue      3
yellow    3
white     3
dtype: int64

In [22]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [23]:
frame.apply((lambda x: x.mean()), axis=0)

ball      6.0
pen       7.0
pencil    8.0
paper     9.0
dtype: float64

In [24]:
frame.apply((lambda x: x.sum()), axis=1)

red        6
blue      22
yellow    38
white     54
dtype: int64

In [26]:
frame.apply(lambda x: pd.Series([x.min(), x.max()], 
                                       index=['min', 'max']))

Unnamed: 0,ball,pen,pencil,paper
min,0,1,2,3
max,12,13,14,15


---

## Statistics Functions

In [27]:
frame.sum()

ball      24
pen       28
pencil    32
paper     36
dtype: int64

In [28]:
frame.mean()

ball      6.0
pen       7.0
pencil    8.0
paper     9.0
dtype: float64

In [31]:
frame.describe()

Unnamed: 0,ball,pen,pencil,paper
count,4.0,4.0,4.0,4.0
mean,6.0,7.0,8.0,9.0
std,5.163978,5.163978,5.163978,5.163978
min,0.0,1.0,2.0,3.0
25%,3.0,4.0,5.0,6.0
50%,6.0,7.0,8.0,9.0
75%,9.0,10.0,11.0,12.0
max,12.0,13.0,14.0,15.0


---

# Important Points

- add, sub, mul, div
- object.apply(function, axis)
- object.sum()
- object.mean()
- object.describe()
- operations between Series and DataFrame are possible, with rule the same as broadcasting rule in numpy