# Pandas

In [1]:
pip install pandas

Collecting pandas
  Downloading pandas-1.5.3-cp311-cp311-win_amd64.whl (10.3 MB)
     --------------------------------------- 10.3/10.3 MB 13.3 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Downloading pytz-2022.7.1-py2.py3-none-any.whl (499 kB)
     ------------------------------------- 499.4/499.4 kB 15.3 MB/s eta 0:00:00
Installing collected packages: pytz, pandas
Successfully installed pandas-1.5.3 pytz-2022.7.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd

## Data structures in pandas
1. panel
2. dataframe
3. series

#### Series
it's a one dimensional labelled array capable of holding any data type 

#### DataFrame
it's a two dimensional tabular structure with rows and columns

### Series
can be created in many ways 
1. from list
2. form numpy arrays
3. from dictionary
4. scalar values
5. callable objects

#### 1. from list

In [3]:
x = [15,24,89,45,32]
pd.Series(x)

0    15
1    24
2    89
3    45
4    32
dtype: int64

In [4]:
pd.Series(data = x, index = ['a','b','c','d','e'])

a    15
b    24
c    89
d    45
e    32
dtype: int64

In [5]:
pd.Series(x, ['a','b','c','d','e'])

a    15
b    24
c    89
d    45
e    32
dtype: int64

In [8]:
pd.Series(index = ['  I',' II','III',' IV','  V'], data = x)

  I    15
 II    24
III    89
 IV    45
  V    32
dtype: int64

In [9]:
labels = ['001','002','003','004','005']
x = ['abc','pqr','def','mno','xyz']
pd.Series(x, labels)

001    abc
002    pqr
003    def
004    mno
005    xyz
dtype: object

In [11]:
pd.Series([1,2,3,4,'x']) # series is homogeneous datatype

0    1
1    2
2    3
3    4
4    x
dtype: object

#### 2. from NumPy arrays

In [12]:
import numpy as np
a = np.arange(10,100,10)
a

array([10, 20, 30, 40, 50, 60, 70, 80, 90])

In [15]:
pd.Series(a) # labels with numpy

0    10
1    20
2    30
3    40
4    50
5    60
6    70
7    80
8    90
dtype: int32

In [16]:
pd.Series(a,index=np.arange(101,1001,100)) # index using numpy

101    10
201    20
301    30
401    40
501    50
601    60
701    70
801    80
901    90
dtype: int32

#### 3. from dictionary

In [17]:
dic = {'a':20,'b':30,'c':40,'d':50}
pd.Series(dic)

a    20
b    30
c    40
d    50
dtype: int64

In [18]:
pd.Series(dic,["b","b","a","d"])

b    30
b    30
a    20
d    50
dtype: int64

In [23]:
pd.Series(dic,["b","b","a","d","e"])

b    30.0
b    30.0
a    20.0
d    50.0
e     NaN
dtype: float64

In [21]:
type(np.NaN)

float

#### 4. from scalar values

In [24]:
pd.Series(4)

0    4
dtype: int64

In [27]:
pd.Series(4,index = np.arange(5))

0    4
1    4
2    4
3    4
4    4
dtype: int64

In [29]:
pd.Series((100,200,300,400),index = np.arange(4))

0    100
1    200
2    300
3    400
dtype: int64

In [30]:
pd.Series((100,200,300,400),index = np.arange(5))

ValueError: Length of values (4) does not match length of index (5)

#### 5. Callable objects 

In [31]:
pd.Series([print,max,min,sum,len,type,np.append])

0                  <built-in function print>
1                    <built-in function max>
2                    <built-in function min>
3                    <built-in function sum>
4                    <built-in function len>
5                             <class 'type'>
6    <function append at 0x000002415C75CEA0>
dtype: object

In [32]:
pd.Series([int,float,str,list,tuple,dict,set])

0      <class 'int'>
1    <class 'float'>
2      <class 'str'>
3     <class 'list'>
4    <class 'tuple'>
5     <class 'dict'>
6      <class 'set'>
dtype: object

### Attributes of pandas series

In [34]:
#1. dtype

l = [20,30,40,50]
i = ['a','b','c','d']
s1 = pd.Series(l,i)
s1

a    20
b    30
c    40
d    50
dtype: int64

In [35]:
s1.dtype

dtype('int64')

In [37]:
s1[0]

20

In [38]:
s1[1],s1[2]

(30, 40)

In [42]:
s1[0] = 60
s1

a    60
b    30
c    40
d    50
dtype: int64

In [44]:
s = pd.Series(data=[1,2,'x'],index=['a','b','c'])
s

a    1
b    2
c    x
dtype: object

In [45]:
s.dtype

dtype('O')

In [46]:
type(s[0])

int

In [47]:
type(s[2])

str

In [48]:
s.keys()

Index(['a', 'b', 'c'], dtype='object')

In [54]:
s.keys

<bound method Series.keys of a    1
b    2
c    x
dtype: object>

In [50]:
s.values

array([1, 2, 'x'], dtype=object)

In [51]:
s.values.dtype

dtype('O')

In [53]:
s.keys().dtype

dtype('O')

In [55]:
"a" in s

True

In [56]:
"p" in s

False

### Methods

In [57]:
s

a    1
b    2
c    x
dtype: object

In [69]:
#get
s.get("c")

'x'

In [59]:
s.get("d",default = "not found")

'not found'

In [60]:
s.get("b",default = "not found")

2

In [70]:
#append
s1 = pd.Series([1,2,3])
s2 = pd.Series([4,5,6])
s1.append(s2)

  s1.append(s2)


0    1
1    2
2    3
0    4
1    5
2    6
dtype: int64

In [73]:
pd.concat([s1,s2])

0    1
1    2
2    3
0    4
1    5
2    6
dtype: int64

In [75]:
##add
s1.add(s2)

0    5
1    7
2    9
dtype: int64

In [77]:
s3 = pd.Series([4,5,6,7])
s3

0    4
1    5
2    6
3    7
dtype: int64

In [78]:
s1.add(s3)

0    5.0
1    7.0
2    9.0
3    NaN
dtype: float64

In [80]:
s1.add(s3,fill_value=0)

0    5.0
1    7.0
2    9.0
3    7.0
dtype: float64

In [81]:
#pop

s

a    1
b    2
c    x
dtype: object

In [83]:
s.pop('c')

'x'

In [84]:
s

a    1
b    2
dtype: object

In [86]:
# drop

s3

0    4
1    5
2    6
3    7
dtype: int64

In [87]:
s3.drop([1,2])

0    4
3    7
dtype: int64

In [90]:
s3

0    4
1    5
2    6
3    7
dtype: int64

In [92]:
s3.drop(s3.index[3])

0    4
1    5
2    6
dtype: int64

In [93]:
s3.pop(2)

6

In [94]:
s3

0    4
1    5
3    7
dtype: int64

In [95]:
s3[2]

KeyError: 2

In [96]:
s3.mean()

5.333333333333333

In [97]:
s3>5

0    False
1    False
3     True
dtype: bool

In [98]:
s3[s3>5]

3    7
dtype: int64