# Introduction To Pandas :

### Installing Pandas Library

In [None]:
!pip install pandas

### Importing Pandas Library

In [1]:
import pandas as pd

In [2]:
pd.__version__

'1.2.4'

## Series

In [3]:
obj = pd.Series([4, 7, -5, 3])

In [6]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [7]:
obj2 = pd.Series([10, 20, 30, 40], index=['d','b','a','c'])

In [8]:
obj2

d    10
b    20
a    30
c    40
dtype: int64

### Fetching values on the basis of indexing

In [9]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [10]:
obj[1]

7

In [12]:
obj2

d    10
b    20
a    30
c    40
dtype: int64

In [13]:
obj2['a']

30

In [14]:
obj2[2]

30

#### getting the name of all the indexes

In [15]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [16]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

#### access multiple values via indexing 

In [23]:
obj[[0,2,3]]

0    4
2   -5
3    3
dtype: int64

In [24]:
obj2[['d','a','c']]

d    10
a    30
c    40
dtype: int64

In [25]:
obj2[[0,2,3]]

d    10
a    30
c    40
dtype: int64

#### changing the value using index

In [26]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [27]:
obj[2] = 15

In [28]:
obj

0     4
1     7
2    15
3     3
dtype: int64

In [29]:
obj2

d    10
b    20
a    30
c    40
dtype: int64

In [30]:
obj2['a'] = 50

In [31]:
obj2

d    10
b    20
a    50
c    40
dtype: int64

#### boolean as an output

In [32]:
obj > 7

0    False
1    False
2     True
3    False
dtype: bool

In [33]:
obj2 > 20

d    False
b    False
a     True
c     True
dtype: bool

#### values as an output

In [34]:
obj[obj > 7]

2    15
dtype: int64

In [35]:
obj2[obj2 > 20]

a    50
c    40
dtype: int64

#### arithmetic operations

In [36]:
obj2

d    10
b    20
a    50
c    40
dtype: int64

In [43]:
obj2 + 2, obj2 - 2, obj2 * 2, obj2 / 2

(d    12
 b    22
 a    52
 c    42
 dtype: int64,
 d     8
 b    18
 a    48
 c    38
 dtype: int64,
 d     20
 b     40
 a    100
 c     80
 dtype: int64,
 d     5.0
 b    10.0
 a    25.0
 c    20.0
 dtype: float64)

In [44]:
obj2

d    10
b    20
a    50
c    40
dtype: int64

### Using NumPy in Pandas

In [45]:
import numpy as np

In [48]:
np.sum(obj2)

120

In [49]:
np.max(obj2)

50

In [50]:
np.min(obj2)

10

In [51]:
np.mean(obj2)

30.0

In [52]:
np.exp(obj2)

d    2.202647e+04
b    4.851652e+08
a    5.184706e+21
c    2.353853e+17
dtype: float64

In [53]:
np.sin(obj2)

d   -0.544021
b    0.912945
a   -0.262375
c    0.745113
dtype: float64

### Creating Series From Python Dictionary

In [54]:
sdata = {
    'Mumbai': 3500,
    'Pune': 7100,
    'Nagpur': 4600,
    'Chennai': 5000
}
sdata

{'Mumbai': 3500, 'Pune': 7100, 'Nagpur': 4600, 'Chennai': 5000}

In [55]:
obj3 = pd.Series(sdata)
obj3

Mumbai     3500
Pune       7100
Nagpur     4600
Chennai    5000
dtype: int64

In [56]:
mydict = {
    'city': ['Mumbai','Pune','Nagpur'],
    'salary': [3500,7100,4600]
}
mydict  # multiple values

{'city': ['Mumbai', 'Pune', 'Nagpur'], 'salary': [3500, 7100, 4600]}

In [57]:
demo = pd.Series(mydict)
demo

city      [Mumbai, Pune, Nagpur]
salary        [3500, 7100, 4600]
dtype: object

In [60]:
demo['city'][1]

'Pune'

### Identifying Null Values

In [67]:
obj4 = pd.Series(sdata, index = ['Mumbai', 'Pune', 'Halloween', 'Nagpur', 'Banglore'])

In [68]:
obj4

Mumbai       3500.0
Pune         7100.0
Halloween       NaN
Nagpur       4600.0
Banglore        NaN
dtype: float64

In [69]:
pd.isnull(obj4)

Mumbai       False
Pune         False
Halloween     True
Nagpur       False
Banglore      True
dtype: bool

In [70]:
pd.notnull(obj4)

Mumbai        True
Pune          True
Halloween    False
Nagpur        True
Banglore     False
dtype: bool

In [71]:
obj3 + obj4

Banglore         NaN
Chennai          NaN
Halloween        NaN
Mumbai        7000.0
Nagpur        9200.0
Pune         14200.0
dtype: float64

## Data Frame :

In [73]:
data = {
    'city' : ['Mumbai','Pune','Chennai','Nagpur','Delhi'],
    'year' : [2001, 2011, 2018, 2019, 2020],
    'population' : [1.5, 1.7, 3.6, 2.4, 3.2]
}
data

{'city': ['Mumbai', 'Pune', 'Chennai', 'Nagpur', 'Delhi'],
 'year': [2001, 2011, 2018, 2019, 2020],
 'population': [1.5, 1.7, 3.6, 2.4, 3.2]}

In [74]:
df = pd.DataFrame(data)

In [77]:
df

Unnamed: 0,city,year,population
0,Mumbai,2001,1.5
1,Pune,2011,1.7
2,Chennai,2018,3.6
3,Nagpur,2019,2.4
4,Delhi,2020,3.2


In [78]:
df.head(3)

Unnamed: 0,city,year,population
0,Mumbai,2001,1.5
1,Pune,2011,1.7
2,Chennai,2018,3.6


In [79]:
df.tail(3)

Unnamed: 0,city,year,population
2,Chennai,2018,3.6
3,Nagpur,2019,2.4
4,Delhi,2020,3.2


In [85]:
mydict = {
    'city':['Mumbai','Mumbai','Mumbai','Pune','Pune','Pune'],
    'year':[2010,2011,2012,2011,2012,2013],
    'pop':[1.5,1.7,3.6,2.4,2.9,3.2]
}

In [86]:
df2 = pd.DataFrame(mydict)

In [87]:
df2

Unnamed: 0,city,year,pop
0,Mumbai,2010,1.5
1,Mumbai,2011,1.7
2,Mumbai,2012,3.6
3,Pune,2011,2.4
4,Pune,2012,2.9
5,Pune,2013,3.2


In [89]:
pd.DataFrame(mydict, columns =['year','city','pop'])

Unnamed: 0,year,city,pop
0,2010,Mumbai,1.5
1,2011,Mumbai,1.7
2,2012,Mumbai,3.6
3,2011,Pune,2.4
4,2012,Pune,2.9
5,2013,Pune,3.2


### Defining your own index names

In [90]:
df3 = pd.DataFrame(mydict, columns = ['year','city','pop','debt'],
                   index=['one','two','three','four','five','six'])

In [91]:
df3

Unnamed: 0,year,city,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,
five,2012,Pune,2.9,
six,2013,Pune,3.2,


In [92]:
df3.columns   #names of the columns

Index(['year', 'city', 'pop', 'debt'], dtype='object')

In [97]:
df3.index

Index(['one', 'two', 'three', 'four', 'five', 'six'], dtype='object')

### Fetching rows & columns

In [98]:
df3['city']   # fethcing on the basis of column

one      Mumbai
two      Mumbai
three    Mumbai
four       Pune
five       Pune
six        Pune
Name: city, dtype: object

In [95]:
df3.loc['four']  # fething on the basis of row

year    2011
city    Pune
pop      2.4
debt     NaN
Name: four, dtype: object

#### assigning column values

In [99]:
df3['debt'] = 16.5

In [100]:
df3

Unnamed: 0,year,city,pop,debt
one,2010,Mumbai,1.5,16.5
two,2011,Mumbai,1.7,16.5
three,2012,Mumbai,3.6,16.5
four,2011,Pune,2.4,16.5
five,2012,Pune,2.9,16.5
six,2013,Pune,3.2,16.5


In [104]:
df3['debt'] = np.arange(1,7)

In [105]:
df3

Unnamed: 0,year,city,pop,debt
one,2010,Mumbai,1.5,1
two,2011,Mumbai,1.7,2
three,2012,Mumbai,3.6,3
four,2011,Pune,2.4,4
five,2012,Pune,2.9,5
six,2013,Pune,3.2,6


### using loc & iloc

In [106]:
df3.loc['one']

year      2010
city    Mumbai
pop        1.5
debt         1
Name: one, dtype: object

In [116]:
df3.loc[['one','two']]

Unnamed: 0,year,city,pop,debt
one,2010,Mumbai,1.5,1
two,2011,Mumbai,1.7,2


In [117]:
df3.iloc[1]  # can fetch on the basis of default index position

year      2011
city    Mumbai
pop        1.7
debt         2
Name: two, dtype: object

In [118]:
df3.iloc[[1,2,3]]

Unnamed: 0,year,city,pop,debt
two,2011,Mumbai,1.7,2
three,2012,Mumbai,3.6,3
four,2011,Pune,2.4,4
