In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Indexing, Slicing, Filtering and Transforming Data Frames

In [2]:
data = pd.read_csv('pokemon.csv')
data= data.set_index("#")
data.head()

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [3]:
data["HP"][1]

45

In [4]:
data.HP[1]


45

In [5]:
data.loc[1,["HP"]]


HP    45
Name: 1, dtype: object

In [6]:
data[["HP","Attack"]]


Unnamed: 0_level_0,HP,Attack
#,Unnamed: 1_level_1,Unnamed: 2_level_1
1,45,49
2,60,62
3,80,82
4,80,100
5,39,52
...,...,...
796,50,100
797,50,160
798,80,110
799,80,160


In [7]:
print(type(data["HP"]))     # series
print(type(data[["HP"]]))

<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>


In [8]:
# Slicing and indexing series
data.loc[1:10,"HP":"Defense"]

Unnamed: 0_level_0,HP,Attack,Defense
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,45,49,49
2,60,62,63
3,80,82,83
4,80,100,123
5,39,52,43
6,58,64,58
7,78,84,78
8,78,130,111
9,78,104,78
10,44,48,65


In [9]:
# Reverse slicing 
data.loc[10:1:-1,"HP":"Defense"] 

Unnamed: 0_level_0,HP,Attack,Defense
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,44,48,65
9,78,104,78
8,78,130,111
7,78,84,78
6,58,64,58
5,39,52,43
4,80,100,123
3,80,82,83
2,60,62,63
1,45,49,49


In [10]:
data.loc[1:10,"Speed":] 

Unnamed: 0_level_0,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,45,1,False
2,60,1,False
3,80,1,False
4,80,1,False
5,65,1,False
6,80,1,False
7,100,1,False
8,100,1,False
9,100,1,False
10,43,1,False


In [11]:
# Creating boolean series
boolean = data.HP > 200
data[boolean]

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
122,Chansey,Normal,,250,5,5,35,105,50,1,False
262,Blissey,Normal,,255,10,10,75,135,55,2,False


In [12]:
# Combining filters
first_filter = data.HP > 150
second_filter = data.Speed > 35
data[first_filter & second_filter]

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
122,Chansey,Normal,,250,5,5,35,105,50,1,False
262,Blissey,Normal,,255,10,10,75,135,55,2,False
352,Wailord,Water,,170,90,45,90,45,60,3,False
656,Alomomola,Water,,165,75,80,40,45,65,5,False


In [13]:
# Filtering column based others
data.HP[data.Speed<15]

#
231     20
360     45
487     50
496    135
659     44
Name: HP, dtype: int64

In [14]:
# Plain python functions
def div(n):
    return n/2
data.HP.apply(div)

#
1      22.5
2      30.0
3      40.0
4      40.0
5      19.5
       ... 
796    25.0
797    25.0
798    40.0
799    40.0
800    40.0
Name: HP, Length: 800, dtype: float64

In [15]:
# Or we can use lambda function
data.HP.apply(lambda n : n/2)

#
1      22.5
2      30.0
3      40.0
4      40.0
5      19.5
       ... 
796    25.0
797    25.0
798    40.0
799    40.0
800    40.0
Name: HP, Length: 800, dtype: float64

In [16]:
data["total_power"] = data.Attack + data.Defense
data.head()

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,total_power
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,98
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,125
3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,165
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,223
5,Charmander,Fire,,39,52,43,60,50,65,1,False,95


## Index Objects, Hierarchical Indexing, Pivoting, Stacking-Unstacking and Melting

In [17]:
# our index name is this:
print(data.index.name)
# lets change it
data.index.name = "index_name"
data.head()

#


Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,total_power
index_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,98
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,125
3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,165
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,223
5,Charmander,Fire,,39,52,43,60,50,65,1,False,95


In [18]:
# Overwrite index
# if we want to modify index we need to change all of them.
data.head()
# first copy of our data to data3 then change index 
data3 = data.copy()
# lets make index start from 100. It is not remarkable change but it is just example
data3.index = range(100,900,1)
data3.head()

Unnamed: 0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,total_power
100,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,98
101,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,125
102,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,165
103,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,223
104,Charmander,Fire,,39,52,43,60,50,65,1,False,95


In [19]:
data = pd.read_csv('pokemon.csv')
data.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [20]:
data1 = data.set_index(["Type 1","Type 2"]) 
data1.head(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,#,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Type 1,Type 2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Grass,Poison,1,Bulbasaur,45,49,49,65,65,45,1,False
Grass,Poison,2,Ivysaur,60,62,63,80,80,60,1,False
Grass,Poison,3,Venusaur,80,82,83,100,100,80,1,False
Grass,Poison,4,Mega Venusaur,80,100,123,122,120,80,1,False
Fire,,5,Charmander,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...
Poison,,96,Grimer,80,80,50,40,50,25,1,False
Poison,,97,Muk,105,105,75,65,100,50,1,False
Water,,98,Shellder,30,65,100,45,25,40,1,False
Water,Ice,99,Cloyster,50,95,180,85,45,70,1,False


In [21]:
dic = {"treatment":["A","A","B","B"],"gender":["F","M","F","M"],"response":[10,45,5,9],"age":[15,4,72,65]}
df = pd.DataFrame(dic)
df

Unnamed: 0,treatment,gender,response,age
0,A,F,10,15
1,A,M,45,4
2,B,F,5,72
3,B,M,9,65


In [22]:
df.pivot(index="treatment",columns = "gender",values="response")


gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,45
B,5,9


In [23]:
df1 = df.set_index(["treatment","gender"])
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,response,age
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,10,15
A,M,45,4
B,F,5,72
B,M,9,65


In [24]:
df1.unstack(level=0)

Unnamed: 0_level_0,response,response,age,age
treatment,A,B,A,B
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
F,10,5,15,72
M,45,9,4,65


In [25]:
df1.unstack(level=1)


Unnamed: 0_level_0,response,response,age,age
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,10,45,15,4
B,5,9,72,65


In [26]:
df2 = df1.swaplevel(0,1)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,response,age
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,10,15
M,A,45,4
F,B,5,72
M,B,9,65


In [27]:
df


Unnamed: 0,treatment,gender,response,age
0,A,F,10,15
1,A,M,45,4
2,B,F,5,72
3,B,M,9,65


In [28]:
pd.melt(df,id_vars="treatment",value_vars=["age","response"])


Unnamed: 0,treatment,variable,value
0,A,age,15
1,A,age,4
2,B,age,72
3,B,age,65
4,A,response,10
5,A,response,45
6,B,response,5
7,B,response,9


In [29]:
df.groupby("treatment").mean()

Unnamed: 0_level_0,response,age
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,27.5,9.5
B,7.0,68.5


In [30]:
df.groupby("treatment").age.max() 


treatment
A    15
B    72
Name: age, dtype: int64

In [31]:
df.groupby("treatment")[["age","response"]].min() 


Unnamed: 0_level_0,age,response
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,4,10
B,65,5


In [32]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   treatment  4 non-null      object
 1   gender     4 non-null      object
 2   response   4 non-null      int64 
 3   age        4 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 256.0+ bytes
