# Documents (Reimplementation)
#### Reference API Documents
 - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html

In [1]:
import pandas as pd
import numpy as np

# 001. pandas.DataFrame.index

#### Constructing DataFrame from a dictionary

In [6]:
d = {'col1':[1,2], 'col2':[3,4]}
df = pd.DataFrame(data=d)
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [7]:
df.dtypes

col1    int64
col2    int64
dtype: object

#### To enforce a single dtype

In [8]:
df = pd.DataFrame(data=d, dtype=np.int8)
df.dtypes

col1    int8
col2    int8
dtype: object

#### Constructing DataFrame from numpy ndarray

In [11]:
df2 = pd.DataFrame(np.array([[1,2,3,],[4,5,6],[7,8,9]]),
                   columns=['a','b','c'])
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


# 002. pandas.DataFrame.T

In [12]:
d1 = {'col1':[1,2], 'col2':[3,4]}
df1 = pd.DataFrame(data=d1)
df1

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [13]:
df1_transposed = df1.T
df1_transposed

Unnamed: 0,0,1
col1,1,2
col2,3,4


In [14]:
df1.transpose()

Unnamed: 0,0,1
col1,1,2
col2,3,4


In [15]:
df1.dtypes

col1    int64
col2    int64
dtype: object

In [16]:
df1_transposed.dtypes

0    int64
1    int64
dtype: object

#### Non-square DataFrame with mixed dtypes

In [17]:
d2 = {'name':['Alice','Bob'],
      'score':[9.5,8],
      'employed': [False, True],
      'kids': [0,0]}
df2 = pd.DataFrame(data=d2)
df2

Unnamed: 0,name,score,employed,kids
0,Alice,9.5,False,0
1,Bob,8.0,True,0


In [18]:
df2_transposed = df2.T
df2_transposed

Unnamed: 0,0,1
name,Alice,Bob
score,9.5,8
employed,False,True
kids,0,0


In [19]:
df2.dtypes

name         object
score       float64
employed       bool
kids          int64
dtype: object

In [20]:
df2_transposed.dtypes

0    object
1    object
dtype: object

# 003. pandas.DataFrame.at

In [6]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame([[0,2,3],[0,4,1],[10,20,30]],
                  index=[4,5,6], columns=['A','B','C'])
df

Unnamed: 0,A,B,C
4,0,2,3
5,0,4,1
6,10,20,30


In [3]:
df.at[4,'B']

2

In [4]:
df.at[4,'B'] = 10
df.at[4,'B']

10

In [5]:
df.loc[5].at['B']

4

# 004. pandas.DataFrame.axes

In [7]:
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [8]:
df.axes

[RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='object')]

In [10]:
df.axes[0]

RangeIndex(start=0, stop=2, step=1)

In [11]:
df.axes[1]

Index(['col1', 'col2'], dtype='object')

# 005. pandas.DataFrame.columns

In [13]:
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [14]:
df.columns

Index(['col1', 'col2'], dtype='object')

# 006. pandas.DataFrame.dtypes

In [15]:
df = pd.DataFrame({'float' : [1.0],
                   'int' : [1],
                   'datetime' : [pd.Timestamp('20200101')],
                   'string' : ['foo']})
df 

Unnamed: 0,float,int,datetime,string
0,1.0,1,2020-01-01,foo


In [17]:
df.dtypes

float              float64
int                  int64
datetime    datetime64[ns]
string              object
dtype: object

# 007. pandas.DataFrame.empty

In [2]:
df_empty = pd.DataFrame({'A':[]})
df_empty

Unnamed: 0,A


In [3]:
df_empty.empty

True

In [6]:
import numpy as np
df = pd.DataFrame({'A':[np.nan]})
df

Unnamed: 0,A
0,


In [7]:
df.empty

False

In [8]:
df.dropna().empty

True

# 008. pandas.DataFrame.iat

In [9]:
df = pd.DataFrame([[0,2,3],[0,4,1],[10,20,30]],
                  columns=['A','B','C'])
df

Unnamed: 0,A,B,C
0,0,2,3
1,0,4,1
2,10,20,30


In [10]:
df.iat[1,2]

1

In [11]:
df.iat[1,2] = 10
df.iat[1,2] 

10

In [12]:
df.loc[0].iat[1]

2

# 009. pandas.DataFrame.iloc

In [13]:
mydict = [{'a':1, 'b':2, 'c':3, 'd':4},
          {'a':100, 'b':200, 'c':300, 'd':400},
          {'a':1000, 'b':2000, 'c':3000, 'd':4000}]
df = pd.DataFrame(mydict)
df

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [14]:
type(df.iloc[0])

pandas.core.series.Series

In [17]:
df.iloc[0]   # Series

a    1
b    2
c    3
d    4
Name: 0, dtype: int64

In [18]:
df.iloc[[0]]

Unnamed: 0,a,b,c,d
0,1,2,3,4


In [19]:
df.iloc[[0,1]]

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400


In [20]:
df.iloc[:3]

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [22]:
df.iloc[[True,False,True]]

Unnamed: 0,a,b,c,d
0,1,2,3,4
2,1000,2000,3000,4000


In [25]:
df.iloc[lambda x: x.index % 2 ==0]

Unnamed: 0,a,b,c,d
0,1,2,3,4
2,1000,2000,3000,4000


In [26]:
df.iloc[0,1]

2

In [27]:
df.iloc[[0,2],[1,3]]

Unnamed: 0,b,d
0,2,4
2,2000,4000


In [28]:
df.iloc[1:3, 0:3]

Unnamed: 0,a,b,c
1,100,200,300
2,1000,2000,3000


In [29]:
df.iloc[:, [True, False, True, False]]

Unnamed: 0,a,c
0,1,3
1,100,300
2,1000,3000


In [30]:
df.iloc[:, lambda df: [0,2]]

Unnamed: 0,a,c
0,1,3
1,100,300
2,1000,3000


# 010. pandas.DataFrame.loc

In [31]:
df = pd.DataFrame([[1,2],[4,5],[7,8]],
                  index=['cobra', 'viper', 'sidewinder'],
                  columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [32]:
df.loc['viper']

max_speed    4
shield       5
Name: viper, dtype: int64

In [33]:
df.loc[['viper','sidewinder']]

Unnamed: 0,max_speed,shield
viper,4,5
sidewinder,7,8


In [34]:
df.loc['cobra','shield']

2

In [35]:
df.loc['cobra':'viper', 'max_speed']

cobra    1
viper    4
Name: max_speed, dtype: int64

In [36]:
df.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [37]:
df.loc[df['shield']>6]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [38]:
df.loc[df['shield']>6, ['max_speed']]

Unnamed: 0,max_speed
sidewinder,7


In [39]:
df.loc[lambda df: df['shield']==8]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [40]:
df.loc[['viper', 'sidewinder'], ['shield']] = 50
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,50
sidewinder,7,50


In [41]:
df.loc['cobra'] = 10
df

Unnamed: 0,max_speed,shield
cobra,10,10
viper,4,50
sidewinder,7,50


In [42]:
df.loc[:, 'max_speed'] = 30
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,30,50
sidewinder,30,50


In [44]:
df.loc[df['shield']>35] = 0
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,0,0
sidewinder,0,0


#### DF indexing with 'Integer labels'

In [45]:
df = pd.DataFrame([[1,2],[4,5],[7,8]],
                  index=[7,8,9],
                  columns=['max_speed', 'shield'])
df

Unnamed: 0,max_speed,shield
7,1,2
8,4,5
9,7,8


In [46]:
df.loc[7:9]

Unnamed: 0,max_speed,shield
7,1,2
8,4,5
9,7,8


#### getting values with a multi-index

In [47]:
tuples = [
    ('cobra', 'mark i'), ('cobra', 'mark ii'),
    ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
    ('viper', 'mark ii'), ('viper', 'mark iii')
]
index = pd.MultiIndex.from_tuples(tuples)
values = [[12,2],[0,4],[10,20],
          [1,4],[7,1],[16,36]]
df = pd.DataFrame(values, columns=['max_speed','shield'], index=index)
df

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark i,12,2
cobra,mark ii,0,4
sidewinder,mark i,10,20
sidewinder,mark ii,1,4
viper,mark ii,7,1
viper,mark iii,16,36


In [48]:
df.loc['cobra']

Unnamed: 0,max_speed,shield
mark i,12,2
mark ii,0,4


In [49]:
df.loc[('cobra', 'mark ii')]

max_speed    0
shield       4
Name: (cobra, mark ii), dtype: int64

In [50]:
df.loc['cobra','mark i']

max_speed    12
shield        2
Name: (cobra, mark i), dtype: int64

In [51]:
df.loc[('cobra','mark i'), 'shield']

2

In [52]:
df.loc[('cobra', 'mark i') : 'viper']

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark i,12,2
cobra,mark ii,0,4
sidewinder,mark i,10,20
sidewinder,mark ii,1,4
viper,mark ii,7,1
viper,mark iii,16,36


In [53]:
df.loc[('cobra','mark i') : ('viper','mark ii')]

Unnamed: 0,Unnamed: 1,max_speed,shield
cobra,mark i,12,2
cobra,mark ii,0,4
sidewinder,mark i,10,20
sidewinder,mark ii,1,4
viper,mark ii,7,1


# 011. pandas.DataFrame.ndim

In [2]:
s = pd.Series({'a':1, 'b':2, 'c':3})
s

a    1
b    2
c    3
dtype: int64

In [3]:
s.ndim

1

In [4]:
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [5]:
df.ndim

2

# 012. pandas.DataFrame.shape

In [6]:
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [7]:
df.shape

(2, 2)

In [8]:
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4], 'col3':[5,6]})
df

Unnamed: 0,col1,col2,col3
0,1,3,5
1,2,4,6


In [9]:
df.shape

(2, 3)

# 013. pandas.DataFrame.size

In [12]:
s = pd.Series({'a':1, 'b':2, 'c':3})
s

a    1
b    2
c    3
dtype: int64

In [13]:
s.size

3

In [16]:
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [15]:
df.size

4

# 014. pandas.DataFrame.values

In [17]:
df = pd.DataFrame({'age' : [3,29],
                   'height' : [94,170],
                   'weight' : [31,115]})
df

Unnamed: 0,age,height,weight
0,3,94,31
1,29,170,115


In [18]:
df.dtypes

age       int64
height    int64
weight    int64
dtype: object

In [19]:
df.values

array([[  3,  94,  31],
       [ 29, 170, 115]], dtype=int64)

In [21]:
df2 = pd.DataFrame([('parrot', 24.0, 'second'),
                    ('lion', 80.5, 1),
                    ('monkey', np.nan, None)],
                    columns=('name','max_speed','rank'))
df2

Unnamed: 0,name,max_speed,rank
0,parrot,24.0,second
1,lion,80.5,1
2,monkey,,


In [22]:
df2.dtypes

name          object
max_speed    float64
rank          object
dtype: object

In [23]:
df2.values

array([['parrot', 24.0, 'second'],
       ['lion', 80.5, 1],
       ['monkey', nan, None]], dtype=object)

# 015. pandas.DataFrame.abs

In [3]:
s = pd.Series([-1,10,2,-3.33,4])
s

0    -1.00
1    10.00
2     2.00
3    -3.33
4     4.00
dtype: float64

In [4]:
s.abs()

0     1.00
1    10.00
2     2.00
3     3.33
4     4.00
dtype: float64

In [5]:
s = pd.Series([1.2+1j])
s

0    1.200000+1.000000j
dtype: complex128

In [6]:
s.abs()

0    1.56205
dtype: float64

In [7]:
s = pd.Series([pd.Timedelta('1 days')])
s

0   1 days
dtype: timedelta64[ns]

In [8]:
s.abs()

0   1 days
dtype: timedelta64[ns]

In [9]:
df = pd.DataFrame({
    'a':[4,5,6,7],
    'b':[10,20,30,40],
    'c':[100,50,-30,-50]
})
df

Unnamed: 0,a,b,c
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [10]:
df.loc[(df.c - 43).abs().argsort()]

Unnamed: 0,a,b,c
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


# 016. Pandas.DataFrame.add

In [3]:
df = pd.DataFrame({'angles':[0,3,4],
                   'degrees':[360,180,360]},
                   index = ['circle', 'triangle', 'rectangle'])
df

Unnamed: 0,angles,degrees
circle,0,360
triangle,3,180
rectangle,4,360


In [4]:
df + 1

Unnamed: 0,angles,degrees
circle,1,361
triangle,4,181
rectangle,5,361


In [5]:
df.add(1)

Unnamed: 0,angles,degrees
circle,1,361
triangle,4,181
rectangle,5,361


In [6]:
df.div(10)

Unnamed: 0,angles,degrees
circle,0.0,36.0
triangle,0.3,18.0
rectangle,0.4,36.0


In [7]:
df.rdiv(10)

Unnamed: 0,angles,degrees
circle,inf,0.027778
triangle,3.333333,0.055556
rectangle,2.5,0.027778


In [8]:
df - [1,2]

Unnamed: 0,angles,degrees
circle,-1,358
triangle,2,178
rectangle,3,358


In [9]:
df.sub([1,2], axis='columns')

Unnamed: 0,angles,degrees
circle,-1,358
triangle,2,178
rectangle,3,358


In [11]:
df.sub(pd.Series([1,1,1],
                 index=['circle','triangle','rectangle']),
                 axis='index')

Unnamed: 0,angles,degrees
circle,-1,359
triangle,2,179
rectangle,3,359


In [12]:
other = pd.DataFrame({'angles':[0,3,4]},
                     index=['circle','triangle','rectangle'])
other

Unnamed: 0,angles
circle,0
triangle,3
rectangle,4


In [13]:
df * other

Unnamed: 0,angles,degrees
circle,0,
triangle,9,
rectangle,16,


In [14]:
df.mul(other, fill_value=0)

Unnamed: 0,angles,degrees
circle,0,0.0
triangle,9,0.0
rectangle,16,0.0


In [15]:
df_multindex = pd.DataFrame({'angles':[0,3,4,4,5,6],
                             'degrees':[360,180,360,360,540,720]},
                            index = [['A','A','A','B','B','B'],
                                     ['circle','triangle','rectangle',
                                      'square','pentagon','hexagon']])
df_multindex

Unnamed: 0,Unnamed: 1,angles,degrees
A,circle,0,360
A,triangle,3,180
A,rectangle,4,360
B,square,4,360
B,pentagon,5,540
B,hexagon,6,720


In [17]:
df.div(df_multindex, level=1, fill_value=0)

Unnamed: 0,Unnamed: 1,angles,degrees
A,circle,,1.0
A,triangle,1.0,1.0
A,rectangle,1.0,1.0
B,square,0.0,0.0
B,pentagon,0.0,0.0
B,hexagon,0.0,0.0


# 017. Pandas.DataFrame.prefix

In [3]:
s = pd.Series([1,2,3,4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [4]:
s.add_prefix('item_')

item_0    1
item_1    2
item_2    3
item_3    4
dtype: int64

In [5]:
df = pd.DataFrame({'A':[1,2,3,4],
                   'B':[3,4,5,6]})
df

Unnamed: 0,A,B
0,1,3
1,2,4
2,3,5
3,4,6


In [6]:
df.add_prefix('col_')

Unnamed: 0,col_A,col_B
0,1,3
1,2,4
2,3,5
3,4,6


# 018. pandas.DataFrame.add_suffix

In [7]:
s = pd.Series([1,2,3,4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [8]:
s.add_suffix('_item')

0_item    1
1_item    2
2_item    3
3_item    4
dtype: int64

In [11]:
df = pd.DataFrame({'A':[1,2,3,4],
                   'B':[3,4,5,6]})
df

Unnamed: 0,A,B
0,1,3
1,2,4
2,3,5
3,4,6


In [13]:
df.add_suffix('_col')

Unnamed: 0,A_col,B_col
0,1,3
1,2,4
2,3,5
3,4,6


# 019. pandas.DataFrame.agg

In [3]:
df = pd.DataFrame([[1,2,3],
                   [4,5,6],
                   [7,8,9],
                   [np.nan, np.nan, np.nan]],
                   columns = ['A','B','C'])
df

Unnamed: 0,A,B,C
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0
3,,,


In [4]:
df.agg(['sum','min'])

Unnamed: 0,A,B,C
sum,12.0,15.0,18.0
min,1.0,2.0,3.0


In [5]:
df.agg({'A':['sum','min'],
        'B':['min','max']})

Unnamed: 0,A,B
max,,8.0
min,1.0,2.0
sum,12.0,


In [6]:
df.agg('mean', axis='columns')

0    2.0
1    5.0
2    8.0
3    NaN
dtype: float64

# 020. pandas.DataFrame.aggregate

In [7]:
df = pd.DataFrame([[1,2,3],
                   [4,5,6],
                   [7,8,9],
                   [np.nan, np.nan, np.nan]],
                  columns = ['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0
3,,,


In [8]:
df.agg(['sum','min'])

Unnamed: 0,A,B,C
sum,12.0,15.0,18.0
min,1.0,2.0,3.0


In [9]:
df.agg({'A':['sum','min'],
        'B':['min','max']})

Unnamed: 0,A,B
max,,8.0
min,1.0,2.0
sum,12.0,


In [10]:
df.agg("mean", axis="columns")

0    2.0
1    5.0
2    8.0
3    NaN
dtype: float64