# Pandas
- **Pandas** is a fast, powerful and easy to use open source data analysis and manipulation library.
1. Series
2. DataFrame

In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

# 1. Series

In [2]:
data = [10,20,30,40,50]
s1 = pd.Series(data)
s1

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
type(s1)

pandas.core.series.Series

In [4]:
s1.values

array([10, 20, 30, 40, 50], dtype=int64)

In [5]:
s1.index

RangeIndex(start=0, stop=5, step=1)

In [6]:
s1.dtype

dtype('int64')

In [7]:
list(s1.index)

[0, 1, 2, 3, 4]

In [8]:
s1.index = [101,102,103,104,105]
s1

101    10
102    20
103    30
104    40
105    50
dtype: int64

In [9]:
s1.index = ['A', 'B', 'C', 'D', 'E']
s1

A    10
B    20
C    30
D    40
E    50
dtype: int64

In [10]:
s1.index = range(1001,1006)
s1

1001    10
1002    20
1003    30
1004    40
1005    50
dtype: int64

In [11]:
s1.index = [1.5,2.5,3.5,4.5,5.7]
s1

1.5    10
2.5    20
3.5    30
4.5    40
5.7    50
dtype: int64

In [12]:
s1.index = ['A', 'B', 'C', 'D', 'E']
s1

A    10
B    20
C    30
D    40
E    50
dtype: int64

## Accessing data in Series

In [13]:
s1[0]

10

In [14]:
s1[1]

20

In [15]:
s1[-1]

50

In [16]:
s1['A']

10

In [17]:
s1['B']

20

In [18]:
s1['D']

40

In [19]:
s1.iloc[0]                       # By positional indexing

10

In [20]:
s1.iloc[[0,1]]

A    10
B    20
dtype: int64

In [21]:
s1.loc['A']                      # By index that has been provided

10

In [22]:
s1.loc[['A','B']]

A    10
B    20
dtype: int64

## Add, update and Delete operations

In [23]:
s1['F'] = 60
s1

A    10
B    20
C    30
D    40
E    50
F    60
dtype: int64

In [24]:
s1['D'] = 80
s1

A    10
B    20
C    30
D    80
E    50
F    60
dtype: int64

In [25]:
s1.drop('D', inplace = True)

In [26]:
s1

A    10
B    20
C    30
E    50
F    60
dtype: int64

## Slicing

In [27]:
s1[:]

A    10
B    20
C    30
E    50
F    60
dtype: int64

In [28]:
s1[2:5]

C    30
E    50
F    60
dtype: int64

In [29]:
s1[-1::-1]

F    60
E    50
C    30
B    20
A    10
dtype: int64

In [30]:
s1[::2]

A    10
C    30
F    60
dtype: int64

## Creating Series from Dictionary

In [31]:
data = {'Mohit' : 85,
        'Ankit' : 80,
        'Anjali': 86,
        'Varun' : 81}
s = pd.Series(data)
s

Mohit     85
Ankit     80
Anjali    86
Varun     81
dtype: int64

In [32]:
data = {'Jaipur' : 'Rajasthan',
        'Mumbai' : 'Maharashtra',
        'Kolkata': 'West Bengal',
        'Chandigarh' : 'Punjab',
        'Bengaluru' : 'Karnataka'}
data

{'Jaipur': 'Rajasthan',
 'Mumbai': 'Maharashtra',
 'Kolkata': 'West Bengal',
 'Chandigarh': 'Punjab',
 'Bengaluru': 'Karnataka'}

In [33]:
s2 = pd.Series(data)
s2

Jaipur          Rajasthan
Mumbai        Maharashtra
Kolkata       West Bengal
Chandigarh         Punjab
Bengaluru       Karnataka
dtype: object

In [34]:
s2['Jaipur']

'Rajasthan'

In [35]:
s2.index

Index(['Jaipur', 'Mumbai', 'Kolkata', 'Chandigarh', 'Bengaluru'], dtype='object')

In [36]:
s2.values

array(['Rajasthan', 'Maharashtra', 'West Bengal', 'Punjab', 'Karnataka'],
      dtype=object)

In [37]:
s2[0]

'Rajasthan'

In [38]:
s2.loc['Jaipur']

'Rajasthan'

In [39]:
s2.iloc[0]

'Rajasthan'

In [40]:
s2.name = 'States and Capitals'
s2

Jaipur          Rajasthan
Mumbai        Maharashtra
Kolkata       West Bengal
Chandigarh         Punjab
Bengaluru       Karnataka
Name: States and Capitals, dtype: object

In [41]:
s2.name

'States and Capitals'

In [42]:
s2.index.name = 'Capitals'
s2

Capitals
Jaipur          Rajasthan
Mumbai        Maharashtra
Kolkata       West Bengal
Chandigarh         Punjab
Bengaluru       Karnataka
Name: States and Capitals, dtype: object

In [43]:
print(s2.index)
print(s2.values)
print(s2.dtype)
print(s2.name)
print(s2.index.name)

Index(['Jaipur', 'Mumbai', 'Kolkata', 'Chandigarh', 'Bengaluru'], dtype='object', name='Capitals')
['Rajasthan' 'Maharashtra' 'West Bengal' 'Punjab' 'Karnataka']
object
States and Capitals
Capitals


In [44]:
s2[['Jaipur','Mumbai','Kolkata']]

Capitals
Jaipur       Rajasthan
Mumbai     Maharashtra
Kolkata    West Bengal
Name: States and Capitals, dtype: object

In [45]:
roll_no = [101,102,103,104,105]
result = [88,89,93,85,96]

s3 = pd.Series(data = result, index = roll_no)
s3.name = 'Final Result'
s3.index.name = 'Roll No'
s3

Roll No
101    88
102    89
103    93
104    85
105    96
Name: Final Result, dtype: int64

In [46]:
s2

Capitals
Jaipur          Rajasthan
Mumbai        Maharashtra
Kolkata       West Bengal
Chandigarh         Punjab
Bengaluru       Karnataka
Name: States and Capitals, dtype: object

In [47]:
capitals = ['Jaipur', 'Kolkata', 'Mumbai']
s4 = pd.Series(data = s2, index = capitals)
s4

Jaipur       Rajasthan
Kolkata    West Bengal
Mumbai     Maharashtra
Name: States and Capitals, dtype: object

In [48]:
capitals = ['Jaipur', 'Kolkata', 'Mumbai', 'Delhi','Chennai']
s5 = pd.Series(data = s2, index = capitals)
s5

Jaipur       Rajasthan
Kolkata    West Bengal
Mumbai     Maharashtra
Delhi              NaN
Chennai            NaN
Name: States and Capitals, dtype: object

## Functions

In [49]:
s5.isnull()                               # Check for missing values

Jaipur     False
Kolkata    False
Mumbai     False
Delhi       True
Chennai     True
Name: States and Capitals, dtype: bool

In [50]:
s5.notnull()

Jaipur      True
Kolkata     True
Mumbai      True
Delhi      False
Chennai    False
Name: States and Capitals, dtype: bool

In [51]:
s5.isnull().sum()

2

In [52]:
s5['Bengaluru'] = 'Karnataka'
s5

Jaipur         Rajasthan
Kolkata      West Bengal
Mumbai       Maharashtra
Delhi                NaN
Chennai              NaN
Bengaluru      Karnataka
Name: States and Capitals, dtype: object

In [53]:
s5.head()

Jaipur       Rajasthan
Kolkata    West Bengal
Mumbai     Maharashtra
Delhi              NaN
Chennai            NaN
Name: States and Capitals, dtype: object

In [54]:
s5.head(3)

Jaipur       Rajasthan
Kolkata    West Bengal
Mumbai     Maharashtra
Name: States and Capitals, dtype: object

In [55]:
s5.tail()

Kolkata      West Bengal
Mumbai       Maharashtra
Delhi                NaN
Chennai              NaN
Bengaluru      Karnataka
Name: States and Capitals, dtype: object

In [56]:
s5.tail(3)

Delhi              NaN
Chennai            NaN
Bengaluru    Karnataka
Name: States and Capitals, dtype: object

In [57]:
s5['Pune'] = 'Maharashtra'
s5

Jaipur         Rajasthan
Kolkata      West Bengal
Mumbai       Maharashtra
Delhi                NaN
Chennai              NaN
Bengaluru      Karnataka
Pune         Maharashtra
Name: States and Capitals, dtype: object

In [58]:
s5.index = range(101,108)
s5

101      Rajasthan
102    West Bengal
103    Maharashtra
104            NaN
105            NaN
106      Karnataka
107    Maharashtra
Name: States and Capitals, dtype: object

In [59]:
s5[108] = 'Maharashtra'
s5[109] = 'Maharashtra'
s5

101      Rajasthan
102    West Bengal
103    Maharashtra
104            NaN
105            NaN
106      Karnataka
107    Maharashtra
108    Maharashtra
109    Maharashtra
Name: States and Capitals, dtype: object

In [60]:
s5.duplicated()                                   # Check for duplicate values

101    False
102    False
103    False
104    False
105     True
106    False
107     True
108     True
109     True
Name: States and Capitals, dtype: bool

In [61]:
s5.duplicated().sum()

4