In [1]:
import numpy as np
import pandas as pd

## Indexing, iteration

### 1) head()

In [2]:
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}

df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


In [3]:
df.head(2) # Default is 5 rows

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA


### 2) at

In [4]:
# Access a single value for a row/column label pair.
# Similar to loc, in that both provide label-based lookups.
# Use at if you only need to get or set a single value in a DataFrame or Series.

df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], index=[4, 5, 6], columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
4,0,2,3
5,0,4,1
6,10,20,30


In [5]:
df.at[4, 'A']

0

In [6]:
df.at[6, 'C']

30

In [7]:
df.at[5, 'B'] = 100
df

Unnamed: 0,A,B,C
4,0,2,3
5,0,100,1
6,10,20,30


### 3) iat

In [8]:
# Access a single value for a row/column pair by integer position.

# Similar to iloc, in that both provide integer-based lookups.
# Use iat if you only need to get or set a single value in a DataFrame or Series.

df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], columns=['A', 'B', 'C'])

df

Unnamed: 0,A,B,C
0,0,2,3
1,0,4,1
2,10,20,30


In [9]:
df.iat[0,0]

0

In [10]:
df.iat[2,2]

30

In [11]:
df.iat[0,2] = 5000
df

Unnamed: 0,A,B,C
0,0,2,5000
1,0,4,1
2,10,20,30


### 4) loc

In [12]:
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
df = pd.DataFrame(data=data)
df

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


In [13]:
df.loc[[0]]

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc


In [14]:
df.loc[0]

Name               Jai
Age                 27
Address          Delhi
Qualification      Msc
Name: 0, dtype: object

In [15]:
df.loc[2, 'Age']

22

In [16]:
df.loc[:, 'Name']

0       Jai
1    Princi
2    Gaurav
3      Anuj
Name: Name, dtype: object

In [17]:
df.loc[[0,1],['Name','Address','Qualification']]

Unnamed: 0,Name,Address,Qualification
0,Jai,Delhi,Msc
1,Princi,Kanpur,MA


In [18]:
df.loc[0, 'Age'] = 32
df

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,32,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


In [19]:
df.loc[df['Age'] > 30]

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,32,Delhi,Msc
3,Anuj,32,Kannauj,Phd


In [20]:
df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
     index=['cobra', 'viper', 'sidewinder'],
     columns=['max_speed', 'shield'])

df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [21]:
df.loc['cobra':'viper', 'max_speed']

cobra    1
viper    4
Name: max_speed, dtype: int64

In [22]:
df.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [23]:
df.loc[df['shield'] > 6, ['max_speed']]

Unnamed: 0,max_speed
sidewinder,7


In [24]:
df.loc[['viper', 'sidewinder'], ['shield']] = 50
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,50
sidewinder,7,50


In [25]:
# Set value for an entire row

df.loc['cobra'] = 10
df

Unnamed: 0,max_speed,shield
cobra,10,10
viper,4,50
sidewinder,7,50


In [26]:
# Set value for an entire column
df.loc[:, 'max_speed'] = 30
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,30,50
sidewinder,30,50


In [27]:
# Set value for rows matching callable condition
df.loc[df['shield'] > 35] = 0
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,0,0
sidewinder,0,0


### 5) iloc --> Integer location

In [28]:
mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
          {'a': 100, 'b': 200, 'c': 300, 'd': 400},
          {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
df5 = pd.DataFrame(mydict)
df5

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [29]:
df5.iloc[[0]]

Unnamed: 0,a,b,c,d
0,1,2,3,4


In [30]:
df5.iloc[0]

a    1
b    2
c    3
d    4
Name: 0, dtype: int64

In [31]:
df5.iloc[1, 3]

400

In [32]:
df5.iloc[[0, 2], [1, 3]]

Unnamed: 0,b,d
0,2,4
2,2000,4000


In [33]:
df5.iloc[1:3, 0:3]

Unnamed: 0,a,b,c
1,100,200,300
2,1000,2000,3000


### 6) df.insert

#### DataFrame.insert(loc, column, value, allow_duplicates=False)[source]
Insert column into DataFrame at specified location.

Raises a ValueError if column is already contained in the DataFrame, unless allow_duplicates is set to True.

In [34]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [35]:
df.insert(loc=1, column='newcol', value=[99,100])

In [36]:
df

Unnamed: 0,col1,newcol,col2
0,1,99,3
1,2,100,4


In [37]:
df.insert(loc=0, column='col1', value=[559,220], allow_duplicates=True)
df

Unnamed: 0,col1,col1.1,newcol,col2
0,559,1,99,3
1,220,2,100,4


In [38]:
# Notice that pandas uses index alignment in case of value from type series

# Here index start from 1 so 0 index value is Nan

df.insert(loc=0, column='col0', value=pd.Series(data=[5,6], index=[1,2]))
df

Unnamed: 0,col0,col1,col1.1,newcol,col2
0,,559,1,99,3
1,5.0,220,2,100,4


### 7) df.items()

#### DataFrame.items()[source]
Iterate over (column name, Series) pairs.

Iterates over the DataFrame columns, returning a tuple with the column name and the content as a Series.

In [39]:
df = pd.DataFrame({'species': ['bear', 'bear', 'marsupial'],
                  'population': [1864, 22000, 80000]},
                  index=['panda', 'polar', 'koala'])
df

Unnamed: 0,species,population
panda,bear,1864
polar,bear,22000
koala,marsupial,80000


In [47]:
for label, content in df.items():
    print(f'label: {label}')
    print()
    print(f'content: {content}', sep='\n')

label: species

content: panda         bear
polar         bear
koala    marsupial
Name: species, dtype: object
label: population

content: panda     1864
polar    22000
koala    80000
Name: population, dtype: int64


### 8) df.iteritems()

In [49]:
for label, content in df.iteritems():
    print(f'label: {label}')
    print()
    print(f'content: {content}', sep='\n')

label: species

content: panda         bear
polar         bear
koala    marsupial
Name: species, dtype: object
label: population

content: panda     1864
polar    22000
koala    80000
Name: population, dtype: int64


### 9) df.keys()

In [50]:
df.keys()

Index(['species', 'population'], dtype='object')

In [51]:
df.columns

Index(['species', 'population'], dtype='object')

### 10) df.iterrows()

#### DataFrame.iterrows()[source]
Iterate over DataFrame rows as (index, Series) pairs.

In [52]:
df = pd.DataFrame([[1, 1.5]], columns=['int', 'float'])
df

Unnamed: 0,int,float
0,1,1.5


In [53]:
row = next(df.iterrows())[1]
row

int      1.0
float    1.5
Name: 0, dtype: float64

In [54]:
row = next(df.iterrows())
row

(0,
 int      1.0
 float    1.5
 Name: 0, dtype: float64)

### 11) df.itertuples()

#### DataFrame.itertuples(index=True, name='Pandas')[source]
Iterate over DataFrame rows as namedtuples.

In [57]:
df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
                  index=['dog', 'hawk'])
df

Unnamed: 0,num_legs,num_wings
dog,4,0
hawk,2,2


In [58]:
for row in df.itertuples():
    print(row)

Pandas(Index='dog', num_legs=4, num_wings=0)
Pandas(Index='hawk', num_legs=2, num_wings=2)


In [59]:
for row in df.itertuples(index=False):
    print(row)

Pandas(num_legs=4, num_wings=0)
Pandas(num_legs=2, num_wings=2)


In [60]:
for row in df.itertuples(name="Animal"):
    print(row)

Animal(Index='dog', num_legs=4, num_wings=0)
Animal(Index='hawk', num_legs=2, num_wings=2)


### 12) df.lookup()

### 13) df.pop()

#### DataFrame.pop(item)[source]
Return item and drop from frame. Raise KeyError if not found.

In [61]:
df = pd.DataFrame([('falcon', 'bird', 389.0),
                   ('parrot', 'bird', 24.0),
                   ('lion', 'mammal', 80.5),
                   ('monkey', 'mammal', np.nan)],
                  columns=('name', 'class', 'max_speed'))
df

Unnamed: 0,name,class,max_speed
0,falcon,bird,389.0
1,parrot,bird,24.0
2,lion,mammal,80.5
3,monkey,mammal,


In [62]:
df.pop(item='class')

0      bird
1      bird
2    mammal
3    mammal
Name: class, dtype: object

In [63]:
df

Unnamed: 0,name,max_speed
0,falcon,389.0
1,parrot,24.0
2,lion,80.5
3,monkey,


### 14) df.tail()

In [64]:
df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
                   'monkey', 'parrot', 'shark', 'whale', 'zebra']})
df

Unnamed: 0,animal
0,alligator
1,bee
2,falcon
3,lion
4,monkey
5,parrot
6,shark
7,whale
8,zebra


In [65]:
df.tail()

Unnamed: 0,animal
4,monkey
5,parrot
6,shark
7,whale
8,zebra


In [66]:
df.tail(3)

Unnamed: 0,animal
6,shark
7,whale
8,zebra


In [69]:
df.tail(-5)

Unnamed: 0,animal
5,parrot
6,shark
7,whale
8,zebra


### 15) df.get()

#### DataFrame.get(key, default=None)[source]
Get item from object for given key (ex: DataFrame column).

Returns default value if not found.

In [70]:
df = pd.DataFrame(
    [
        [24.3, 75.7, "high"],
        [31, 87.8, "high"],
        [22, 71.6, "medium"],
        [35, 95, "medium"],
    ],
    columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
    index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
)
df

Unnamed: 0,temp_celsius,temp_fahrenheit,windspeed
2014-02-12,24.3,75.7,high
2014-02-13,31.0,87.8,high
2014-02-14,22.0,71.6,medium
2014-02-15,35.0,95.0,medium


In [71]:
df.get(key=['temp_celsius', 'windspeed'])

Unnamed: 0,temp_celsius,windspeed
2014-02-12,24.3,high
2014-02-13,31.0,high
2014-02-14,22.0,medium
2014-02-15,35.0,medium


In [72]:
df.get(key=['temp_celsius', 'temp_kelvin'], default='default_value')

'default_value'

### 16) df.isin()

#### DataFrame.isin(values)[source]
Whether each element in the DataFrame is contained in values.

In [73]:
df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
                  index=['falcon', 'dog'])
df

Unnamed: 0,num_legs,num_wings
falcon,2,2
dog,4,0


In [74]:
# When values is a list check whether every value in the DataFrame is present in the list (which animals have 0 or 2 legs or wings)

df.isin(values=[0, 2])

Unnamed: 0,num_legs,num_wings
falcon,True,True
dog,False,True


In [75]:
df.isin(values=[0, 1])

Unnamed: 0,num_legs,num_wings
falcon,False,False
dog,False,True


In [76]:
# To check if values is not in the dataframe, use the ~ operator

~df.isin(values=[0, 2])

Unnamed: 0,num_legs,num_wings
falcon,False,False
dog,True,False


In [77]:
# When values is a dict we can pass values to check for each column separately

df.isin(values={'num_wings' : [0, 3]})

Unnamed: 0,num_legs,num_wings
falcon,False,False
dog,False,True


In [79]:
df.isin(values={'num_wings' : [0, 2]})

Unnamed: 0,num_legs,num_wings
falcon,False,True
dog,False,True


In [80]:
# When values is a Series or DataFrame the index and column must match. Note that ‘falcon’ does not match based on 
# the number of legs in other.

other = pd.DataFrame({'num_legs': [8, 3], 'num_wings': [0, 2]},
                     index=['spider', 'falcon'])

other

Unnamed: 0,num_legs,num_wings
spider,8,0
falcon,3,2


In [81]:
df.isin(values=other)

Unnamed: 0,num_legs,num_wings
falcon,False,True
dog,False,False


### 17) df.where()

### 18) df.mask()

### 19) df.query()