# Multi-Index Practice

In [87]:

import pandas as pd
import numpy as np

In [88]:
outside=['Team1','Team1','Team2','Team2',]
inside=['Yuri','Arisa','Nanako','Kyo']

In [89]:
hier_index=list(zip(outside,inside))
hier_index=pd.MultiIndex.from_tuples(hier_index)

In [90]:
hier_index

MultiIndex(levels=[['Team1', 'Team2'], ['Arisa', 'Kyo', 'Nanako', 'Yuri']],
           codes=[[0, 0, 1, 1], [3, 0, 2, 1]])

In [91]:
df1=pd.DataFrame(np.random.randn(4,2),index=hier_index,columns=['A','B'])

In [92]:
df1

Unnamed: 0,Unnamed: 1,A,B
Team1,Yuri,-0.6958,0.75374
Team1,Arisa,-0.294118,-0.914386
Team2,Nanako,-0.026514,-0.138946
Team2,Kyo,-0.977555,0.239707


In [93]:
df1.loc['Team2']

Unnamed: 0,A,B
Nanako,-0.026514,-0.138946
Kyo,-0.977555,0.239707


In [94]:
df1.loc['Team2'].loc['Kyo']

A   -0.977555
B    0.239707
Name: Kyo, dtype: float64

In [95]:
df1.index.names=['Team','Member']

In [96]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Team,Member,Unnamed: 2_level_1,Unnamed: 3_level_1
Team1,Yuri,-0.6958,0.75374
Team1,Arisa,-0.294118,-0.914386
Team2,Nanako,-0.026514,-0.138946
Team2,Kyo,-0.977555,0.239707


In [97]:
df1.xs('Team1')

Unnamed: 0_level_0,A,B
Member,Unnamed: 1_level_1,Unnamed: 2_level_1
Yuri,-0.6958,0.75374
Arisa,-0.294118,-0.914386


In [98]:
df1.xs(['Team2','Nanako'])

A   -0.026514
B   -0.138946
Name: (Team2, Nanako), dtype: float64

In [99]:
df1.xs('Yuri',level='Member')

Unnamed: 0_level_0,A,B
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Team1,-0.6958,0.75374


# Missing Data Practice

In [100]:
df2=pd.DataFrame({'Japan':[1,2,3,np.nan],
               'China':[2,np.nan,4,np.nan],
                  'Korea':[1,2,3,4]})

In [101]:
df2

Unnamed: 0,Japan,China,Korea
0,1.0,2.0,1
1,2.0,,2
2,3.0,4.0,3
3,,,4


In [102]:
df2.dropna()

Unnamed: 0,Japan,China,Korea
0,1.0,2.0,1
2,3.0,4.0,3


In [103]:
df2.transpose()

Unnamed: 0,0,1,2,3
Japan,1.0,2.0,3.0,
China,2.0,,4.0,
Korea,1.0,2.0,3.0,4.0


In [104]:
df2.dropna(axis=1)

Unnamed: 0,Korea
0,1
1,2
2,3
3,4


In [105]:
df2.dropna(axis=0)

Unnamed: 0,Japan,China,Korea
0,1.0,2.0,1
2,3.0,4.0,3


In [106]:
df2.dropna(thresh=2)

Unnamed: 0,Japan,China,Korea
0,1.0,2.0,1
1,2.0,,2
2,3.0,4.0,3


In [107]:
df2.dropna(thresh=1)

Unnamed: 0,Japan,China,Korea
0,1.0,2.0,1
1,2.0,,2
2,3.0,4.0,3
3,,,4


In [108]:
df2.dropna(axis=1,thresh=4)

Unnamed: 0,Korea
0,1
1,2
2,3
3,4


In [109]:
df2.fillna('-')

Unnamed: 0,Japan,China,Korea
0,1,2,1
1,2,-,2
2,3,4,3
3,-,-,4


In [110]:
df2['China'].fillna(value=df2['China'].std())

0    2.000000
1    1.414214
2    4.000000
3    1.414214
Name: China, dtype: float64

In [111]:
df2['Japan'].fillna(value=df2['Japan'].mean())

0    1.0
1    2.0
2    3.0
3    2.0
Name: Japan, dtype: float64

In [112]:
df2['Japan'].fillna(value=df2['Japan'].std())

0    1.0
1    2.0
2    3.0
3    1.0
Name: Japan, dtype: float64

In [113]:
df2['China'].fillna(value=df2['China'].sum())

0    2.0
1    6.0
2    4.0
3    6.0
Name: China, dtype: float64

In [114]:
df2['China'].fillna(value=df2['China'].mean())

0    2.0
1    3.0
2    4.0
3    3.0
Name: China, dtype: float64

# Groupby Function Practice

In [125]:
data={'Bevarages':['Latte','Latte','Chai','Chai','Coffee','Coffee'],
      'Customer':['Yuri','Keiko','Lohit','Rekha','Kota','Maiko'],
      'Sales':[150,280,100,120,280,230]}
      

In [126]:
df3=pd.DataFrame(data)

In [127]:
df3

Unnamed: 0,Bevarages,Customer,Sales
0,Latte,Yuri,150
1,Latte,Keiko,280
2,Chai,Lohit,100
3,Chai,Rekha,120
4,Coffee,Kota,280
5,Coffee,Maiko,230


In [128]:
df3.groupby

<bound method NDFrame.groupby of   Bevarages Customer  Sales
0     Latte     Yuri    150
1     Latte    Keiko    280
2      Chai    Lohit    100
3      Chai    Rekha    120
4    Coffee     Kota    280
5    Coffee    Maiko    230>

In [129]:
df3

Unnamed: 0,Bevarages,Customer,Sales
0,Latte,Yuri,150
1,Latte,Keiko,280
2,Chai,Lohit,100
3,Chai,Rekha,120
4,Coffee,Kota,280
5,Coffee,Maiko,230


In [131]:
by_drink=df3.groupby('Bevarages')

In [132]:
by_drink

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001D1CDE3F2B0>

In [133]:
by_drink.mean()

Unnamed: 0_level_0,Sales
Bevarages,Unnamed: 1_level_1
Chai,110
Coffee,255
Latte,215


In [134]:
by_drink.sum()

Unnamed: 0_level_0,Sales
Bevarages,Unnamed: 1_level_1
Chai,220
Coffee,510
Latte,430


In [136]:
by_drink.min()

Unnamed: 0_level_0,Customer,Sales
Bevarages,Unnamed: 1_level_1,Unnamed: 2_level_1
Chai,Lohit,100
Coffee,Kota,230
Latte,Keiko,150


In [137]:
by_drink.max()

Unnamed: 0_level_0,Customer,Sales
Bevarages,Unnamed: 1_level_1,Unnamed: 2_level_1
Chai,Rekha,120
Coffee,Maiko,280
Latte,Yuri,280


In [138]:
by_drink.count()

Unnamed: 0_level_0,Customer,Sales
Bevarages,Unnamed: 1_level_1,Unnamed: 2_level_1
Chai,2,2
Coffee,2,2
Latte,2,2


In [139]:
by_drink.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Bevarages,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Chai,2.0,110.0,14.142136,100.0,105.0,110.0,115.0,120.0
Coffee,2.0,255.0,35.355339,230.0,242.5,255.0,267.5,280.0
Latte,2.0,215.0,91.923882,150.0,182.5,215.0,247.5,280.0


In [142]:
by_drink.describe().transpose()

Unnamed: 0,Bevarages,Chai,Coffee,Latte
Sales,count,2.0,2.0,2.0
Sales,mean,110.0,255.0,215.0
Sales,std,14.142136,35.355339,91.923882
Sales,min,100.0,230.0,150.0
Sales,25%,105.0,242.5,182.5
Sales,50%,110.0,255.0,215.0
Sales,75%,115.0,267.5,247.5
Sales,max,120.0,280.0,280.0


In [144]:
by_drink.describe().transpose()['Coffee']

Sales  count      2.000000
       mean     255.000000
       std       35.355339
       min      230.000000
       25%      242.500000
       50%      255.000000
       75%      267.500000
       max      280.000000
Name: Coffee, dtype: float64