# Working with data frames

In [1]:
import numpy as np
import pandas as pd

## Creation

### Serires

In [2]:
s = pd.Series(np.arange(4), index=list('ABCD'))
s

A    0
B    1
C    2
D    3
dtype: int64

#### Indexing

In [3]:
s[:2]

A    0
B    1
dtype: int64

In [4]:
s['B':'D']

B    1
C    2
D    3
dtype: int64

### DataFrame

In [5]:
data = np.arange(1,21).reshape(4,-1)
df = pd.DataFrame(data, columns=list('ABCDE'), index=range(1,5))
df

Unnamed: 0,A,B,C,D,E
1,1,2,3,4,5
2,6,7,8,9,10
3,11,12,13,14,15
4,16,17,18,19,20


#### Indexing

In [6]:
df['A']

1     1
2     6
3    11
4    16
Name: A, dtype: int64

In [7]:
df[['A', 'C']]

Unnamed: 0,A,C
1,1,3
2,6,8
3,11,13
4,16,18


#### Using the `loc` indexing operator

Note: `loc` works on labels.

In [8]:
df.loc[1:3]

Unnamed: 0,A,B,C,D,E
1,1,2,3,4,5
2,6,7,8,9,10
3,11,12,13,14,15


In [9]:
df.loc[1:3, 'B':'D']

Unnamed: 0,B,C,D
1,2,3,4
2,7,8,9
3,12,13,14


#### Using the `iloc` indexing operator

Note: `iloc` works on position indices (like `numpy`)

In [10]:
df.iloc[1:3]

Unnamed: 0,A,B,C,D,E
2,6,7,8,9,10
3,11,12,13,14,15


In [11]:
df.iloc[1:3, 1:3]

Unnamed: 0,B,C
2,7,8
3,12,13


## Other ways to get a data frame

### Read from file

In [12]:
%%file data.csv
a,b,c
1,2,3
4,5,6
7,8,9
10,11,12

Overwriting data.csv


In [13]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9
3,10,11,12


### Read from web

In [14]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

There are many tables in the Wikipedia entry for Durham.

In [15]:
dfs = pd.read_html('https://en.wikipedia.org/wiki/Durham,_North_Carolina')

In [16]:
len(dfs)

15

In [17]:
employers = dfs[4]

In [18]:
employers.head()

Unnamed: 0,Employer,No. of employees
0,Duke University & Duke Univ. Health System,34863
1,IBM,10000
2,Durham Public Schools,4600
3,GlaxoSmithKline,3700
4,Blue Cross & Blue Shield of NC,3200


## Data manipulation

In [19]:
url = 'https://rb.gy/u5avn'

In [20]:
df = pd.read_csv(url, index_col=0)

### Select rows

In [21]:
df.head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False


In [22]:
df.tail(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True
721,Volcanion,Fire,Water,600,80,110,120,130,90,70,6,True


In [23]:
df.sample(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
384,Rayquaza,Dragon,Flying,680,105,150,90,150,90,95,3,True
435,Skuntank,Poison,Dark,479,103,93,67,71,61,84,4,False
30,Nidorina,Poison,,365,70,62,67,55,55,56,1,False


In [24]:
df[1:3]

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False


In [25]:
df.loc[1:3]

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False


#### Chaining operations

In [26]:
df.head(5).tail(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


### Select rows using conditions

In [27]:
df.loc[df.Name.str.startswith('V')].head()

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
37,Vulpix,Fire,,299,38,41,40,50,65,65,1,False
45,Vileplume,Grass,Poison,490,75,80,85,110,90,50,1,False
48,Venonat,Bug,Poison,305,60,55,50,40,55,45,1,False


In [28]:
df.loc[(df.Name.str.startswith('V')) & (df.HP > 90)].head()

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
134,Vaporeon,Water,,525,130,65,60,110,95,65,1,False
494,Victini,Psychic,Fire,600,100,100,100,100,100,100,5,True
640,Virizion,Grass,Fighting,580,91,90,72,90,129,108,5,True


In [29]:
df.query('Attack >= 100 and Defense >= 100').head()

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False
9,BlastoiseMega Blastoise,Water,,630,79,103,120,135,115,78,1,False
28,Sandslash,Ground,,450,75,100,110,45,55,65,1,False
76,Golem,Rock,Ground,495,80,120,130,55,65,45,1,False


In [30]:
df.query('Attack > 100 and Attack > Defense').head()

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False
6,CharizardMega Charizard Y,Fire,Flying,634,78,104,78,159,115,100,1,False
15,BeedrillMega Beedrill,Bug,Poison,495,65,150,40,15,80,145,1,False
34,Nidoking,Poison,Ground,505,81,102,77,85,75,85,1,False
57,Primeape,Fighting,,455,65,105,60,60,70,95,1,False


### Select columns

In [31]:
target_cols = ['Type 1', 'Type 2', 'Total']
df[target_cols].head()

Unnamed: 0_level_0,Type 1,Type 2,Total
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Grass,Poison,318
2,Grass,Poison,405
3,Grass,Poison,525
3,Grass,Poison,625
4,Fire,,309


In [32]:
target_cols = df.columns.str.startswith('T')
df.iloc[:3, target_cols]

Unnamed: 0_level_0,Type 1,Type 2,Total
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Grass,Poison,318
2,Grass,Poison,405
3,Grass,Poison,525


In [33]:
target_cols = df.columns.str.contains('sp', case=False)
df.iloc[:3, target_cols]

Unnamed: 0_level_0,Sp. Atk,Sp. Def,Speed
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,65,65,45
2,80,80,60
3,100,100,80


In [34]:
df.select_dtypes('object').head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Bulbasaur,Grass,Poison
2,Ivysaur,Grass,Poison
3,Venusaur,Grass,Poison


In [35]:
df.select_dtypes('number').head(3)

Unnamed: 0_level_0,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,318,45,49,49,65,65,45,1
2,405,60,62,63,80,80,60,1
3,525,80,82,83,100,100,80,1


In [36]:
df.select_dtypes('bool').head(3)

Unnamed: 0_level_0,Legendary
#,Unnamed: 1_level_1
1,False
2,False
3,False


### Sort rows

In [37]:
df.sort_values('Speed').head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
446,Munchlax,Normal,,390,135,85,40,40,85,5,4,False
213,Shuckle,Bug,Rock,505,20,10,230,10,230,5,2,False
597,Ferroseed,Grass,Steel,305,44,50,91,24,86,10,5,False


In [38]:
df.sort_values('Speed', ascending=False).head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
386,DeoxysSpeed Forme,Psychic,,600,50,95,90,95,90,180,3,True
291,Ninjask,Bug,Flying,456,61,90,45,50,50,160,3,False
386,DeoxysNormal Forme,Psychic,,600,50,150,50,150,50,150,3,True


In [39]:
df.sort_values(['Attack', 'Speed'], ascending=[True, False]).head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
113,Chansey,Normal,,450,250,5,5,35,105,50,1,False
440,Happiny,Normal,,220,100,5,5,15,65,30,4,False
129,Magikarp,Water,,200,20,10,55,15,20,80,1,False


### Create new columns

In [40]:
df['Aggressivenes'] = df['Attack'] - df['Defense']
df.head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Aggressivenes
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False,0
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False,-1
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False,-1


### Rearrange columns

In [41]:
cols = list(df.columns[-1:]) + list(df.columns[:-1])
df = df[cols]
df.head(3)

Unnamed: 0_level_0,Aggressivenes,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,-1,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,-1,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False


### Drop columns

In [42]:
df = df.drop('Aggressivenes', axis=1)

In [43]:
df.head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False


### Drop duplicate rows

In [44]:
df.drop_duplicates(['Type 1', 'Type 2']).head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
6,Charizard,Fire,Flying,534,78,84,78,109,85,100,1,False


### Drop rows with missing data

In [45]:
df.dropna(subset=['Type 1', 'Type 2'], how='any').drop_duplicates(['Type 1', 'Type 2']).head(3)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
6,Charizard,Fire,Flying,534,78,84,78,109,85,100,1,False
6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False


Use parenthesis to break into multiple lines for readability

In [46]:
(
    df
    .dropna(subset=['Type 1', 'Type 2'], how='any')
    .drop_duplicates(['Type 1', 'Type 2'])
    .head(3)
)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
6,Charizard,Fire,Flying,534,78,84,78,109,85,100,1,False
6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False


### Summary

In [47]:
df.dtypes

Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [48]:
df.describe()

Unnamed: 0,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [49]:
df.sum(numeric_only=True)

Total         348082
HP             55407
Attack         63201
Defense        59074
Sp. Atk        58256
Sp. Def        57522
Speed          54622
Generation      2659
Legendary         65
dtype: int64

In [50]:
df.select_dtypes('number').agg(['mean', 'min', 'max'])

Unnamed: 0,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
mean,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
min,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
max,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


### Grouping

In [51]:
s1 = df.groupby('Type 1').agg({
    'Attack': 'median',
    'Defense': ['min', 'max'],
})
s1.head(3)

Unnamed: 0_level_0,Attack,Defense,Defense
Unnamed: 0_level_1,median,min,max
Type 1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Bug,65.0,30,230
Dark,88.0,30,125
Dragon,113.5,35,130


In [52]:
s2 = s1.reset_index()
s2.columns = s2.columns.to_flat_index()
s2.head(3)

Unnamed: 0,"(Type 1, )","(Attack, median)","(Defense, min)","(Defense, max)"
0,Bug,65.0,30,230
1,Dark,88.0,30,125
2,Dragon,113.5,35,130


In [53]:
def my_median(x):
    return np.quantile(x, 0.5)

In [54]:
s3 = (
    df
    .drop(['Name', 'Legendary'], axis=1)
    .groupby(['Type 1', 'Type 2'])
    .agg(my_median)
)
s3.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
Type 1,Type 2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bug,Electric,395.5,60.0,62.0,55.0,77.0,55.0,86.5,5.0
Bug,Fighting,550.0,80.0,155.0,95.0,40.0,100.0,80.0,2.0
Bug,Fire,455.0,70.0,72.5,60.0,92.5,80.0,80.0,5.0


In [55]:
s3.reset_index().sample(3)

Unnamed: 0,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
31,Electric,Grass,520.0,50.0,65.0,107.0,105.0,107.0,86.0,4.0
60,Grass,Fighting,530.0,88.0,107.0,80.0,74.0,75.0,70.0,5.0
1,Bug,Fighting,550.0,80.0,155.0,95.0,40.0,100.0,80.0,2.0
