# Pandas (Cont.)

### Slicing in Series

In [1]:
import pandas as pd
import numpy as np

In [7]:
s = pd.Series([34,78,12,90,23,89,10,100,32,56],index={'a','b','c','d','e','A','B','C','D','E'})
s

b     34
D     78
a     12
e     90
B     23
c     89
C     10
A    100
E     32
d     56
dtype: int64

In [8]:
s = pd.Series([34,78,12,90,23,89,10,100,32,56],index=['a','b','c','d','e','A','B','C','D','E'])
s

a     34
b     78
c     12
d     90
e     23
A     89
B     10
C    100
D     32
E     56
dtype: int64

In [9]:
s[0]

34

In [10]:
s['a']

34

In [11]:
s[2:6]

c    12
d    90
e    23
A    89
dtype: int64

In [12]:
s['a':"A"]

a    34
b    78
c    12
d    90
e    23
A    89
dtype: int64

In [13]:
s["A":"c":-1]

A    89
e    23
d    90
c    12
dtype: int64

In [14]:
s['c':]

c     12
d     90
e     23
A     89
B     10
C    100
D     32
E     56
dtype: int64

In [15]:
s[5:len(s)]

A     89
B     10
C    100
D     32
E     56
dtype: int64

In [16]:
s[-2],s[-1]

(32, 56)

In [17]:
s['E':'e':-1]

E     56
D     32
C    100
B     10
A     89
e     23
dtype: int64

In [18]:
s[::-1]

E     56
D     32
C    100
B     10
A     89
e     23
d     90
c     12
b     78
a     34
dtype: int64

## Basic math operations in Series
### 1. Addition

In [21]:
s1 = pd.Series(np.arange(10),index= np.arange(101,1002,100))
s1

101     0
201     1
301     2
401     3
501     4
601     5
701     6
801     7
901     8
1001    9
dtype: int32

In [22]:
s2 = pd.Series(np.arange(10,101,10), index = np.arange(101,1002,100))
s2

101      10
201      20
301      30
401      40
501      50
601      60
701      70
801      80
901      90
1001    100
dtype: int32

In [23]:
s1+s2

101      10
201      21
301      32
401      43
501      54
601      65
701      76
801      87
901      98
1001    109
dtype: int32

In [24]:
s3 = pd.Series(np.arange(10,101,10), index = np.arange(201,1102,100))
s3

201      10
301      20
401      30
501      40
601      50
701      60
801      70
901      80
1001     90
1101    100
dtype: int32

In [25]:
s1+s3

101      NaN
201     11.0
301     22.0
401     33.0
501     44.0
601     55.0
701     66.0
801     77.0
901     88.0
1001    99.0
1101     NaN
dtype: float64

In [26]:
s4 = pd.Series(np.arange(10,91,10), index = np.arange(101,902,100))
s4

101    10
201    20
301    30
401    40
501    50
601    60
701    70
801    80
901    90
dtype: int32

In [27]:
s1+s4

101     10.0
201     21.0
301     32.0
401     43.0
501     54.0
601     65.0
701     76.0
801     87.0
901     98.0
1001     NaN
dtype: float64

### 3. Adding Scalar value

In [28]:
s

a     34
b     78
c     12
d     90
e     23
A     89
B     10
C    100
D     32
E     56
dtype: int64

In [29]:
s+2

a     36
b     80
c     14
d     92
e     25
A     91
B     12
C    102
D     34
E     58
dtype: int64

In [30]:
s-20

a    14
b    58
c    -8
d    70
e     3
A    69
B   -10
C    80
D    12
E    36
dtype: int64

In [31]:
s*2

a     68
b    156
c     24
d    180
e     46
A    178
B     20
C    200
D     64
E    112
dtype: int64

In [32]:
s/2

a    17.0
b    39.0
c     6.0
d    45.0
e    11.5
A    44.5
B     5.0
C    50.0
D    16.0
E    28.0
dtype: float64

In [33]:
s//2

a    17
b    39
c     6
d    45
e    11
A    44
B     5
C    50
D    16
E    28
dtype: int64

In [35]:
s%4

a    2
b    2
c    0
d    2
e    3
A    1
B    2
C    0
D    0
E    0
dtype: int64

Series ends here,now

# DataFrame
1. two dimensional datastructure
2. in tabular format, consist of rows and colunms
3. colunms can be different type
4. mutable
5. both rows and colunms are labelled
6. perform arihtmetic operations on rows and colunms

To create a dataframe,  we need
1. data 
2. index 
3. columns

DataFrame can be created from
1. list
2. array
3. dictionary
4. Series
5. Another DataFrame

#### Empty DataFrame

In [36]:
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


#### From list

In [37]:
l = [12,45,23,89,100,43,85,47]
pd.DataFrame(l)

Unnamed: 0,0
0,12
1,45
2,23
3,89
4,100
5,43
6,85
7,47


In [39]:
l = [[[12,45,23],[89,100],43,85,47]]
pd.DataFrame(l)

Unnamed: 0,0,1,2,3,4
0,"[12, 45, 23]","[89, 100]",43,85,47


In [40]:
l = [[12,45,23,89,100,43,85,47],['a','b','c','d','e','f','g']]
pd.DataFrame(l)

Unnamed: 0,0,1,2,3,4,5,6,7
0,12,45,23,89,100,43,85,47.0
1,a,b,c,d,e,f,g,


In [41]:
pd.DataFrame(data=[10,20,30,40,50,60,70,8,90])

Unnamed: 0,0
0,10
1,20
2,30
3,40
4,50
5,60
6,70
7,8
8,90


In [43]:
pd.DataFrame(data=[10,20,30,40,50,60,70,8,90],columns=['marks'],)

Unnamed: 0,marks
0,10
1,20
2,30
3,40
4,50
5,60
6,70
7,8
8,90


In [44]:
students = [['alex',10],['bob',12],['dave',13]]
pd.DataFrame(students,columns = ['Name','Age'],index = ["001",'002','003'])

Unnamed: 0,Name,Age
1,alex,10
2,bob,12
3,dave,13


In [49]:
names = ['Alex','bob','dave']
age = [10,12,13]
pd.DataFrame([names,age],columns = ['Name','Age'],index = ['001','002','003'])

ValueError: 2 columns passed, passed data had 3 columns

In [50]:
names = ['Alex','bob','dave']
age = [10,12,13]
pd.DataFrame([[names,age]],columns = ['Name','Age'],index = ['001','002','003'])

Unnamed: 0,Name,Age
1,"[Alex, bob, dave]","[10, 12, 13]"
2,"[Alex, bob, dave]","[10, 12, 13]"
3,"[Alex, bob, dave]","[10, 12, 13]"


#### 2. from ndarrays

In [52]:
n = np.arange(8)
n

array([0, 1, 2, 3, 4, 5, 6, 7])

In [53]:
pd.DataFrame(n)

Unnamed: 0,0
0,0
1,1
2,2
3,3
4,4
5,5
6,6
7,7


In [54]:
n = np.arange(8).reshape(4,2)
n

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [55]:
pd.DataFrame(n)

Unnamed: 0,0,1
0,0,1
1,2,3
2,4,5
3,6,7


In [56]:
n = np.arange(8).reshape(4,2,1)
n

array([[[0],
        [1]],

       [[2],
        [3]],

       [[4],
        [5]],

       [[6],
        [7]]])

In [57]:

pd.DataFrame(n)

ValueError: Must pass 2-d input. shape=(4, 2, 1)

In [58]:
n = np.arange(8).reshape(4,2)
pd.DataFrame(n,columns = ['col 1','col 2'], index = ['I','II','III','IV'])

Unnamed: 0,col 1,col 2
I,0,1
II,2,3
III,4,5
IV,6,7


### From Dictionary
#### 1. from dictionary of list 

In [63]:
students ={"Name":['alex','bob','dave'],"age":[10,12,13]}
pd.DataFrame(students,index = np.arange(100,103))

Unnamed: 0,Name,age
100,alex,10
101,bob,12
102,dave,13


In [73]:
#using dictionary
months = {"Number":[1,2,3,4,5,6,7,8,9,10,11,12],"month":['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
pd.DataFrame(months)

Unnamed: 0,Number,month
0,1,jan
1,2,feb
2,3,mar
3,4,apr
4,5,may
5,6,jun
6,7,jul
7,8,aug
8,9,sep
9,10,oct


In [76]:
#using list
month = [[1,'jan'],[2,'feb'],[3,'mar'],[4,'apr'],[5,'may'],[6,'jun'],[7,'jul'],[8,'aug'],[9,'sep'],[10,'oct'],[11,'nov'],[12,'dec']]
pd.DataFrame(month,columns=["number",'month'])

Unnamed: 0,number,month
0,1,jan
1,2,feb
2,3,mar
3,4,apr
4,5,may
5,6,jun
6,7,jul
7,8,aug
8,9,sep
9,10,oct
