### What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

https://pandas.pydata.org/about/index.html

### Pandas Series

A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

### Importing Pandas

In [1]:
!pip install pandas

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [1]:
import numpy as np
import pandas as pd

## Gist Of Series

### Series using scaler number

In [3]:
s = pd.Series(100, index = list(range(0,5))) # Creating using a scalar
print(s)

0    100
1    100
2    100
3    100
4    100
dtype: int64


In [4]:
type(s)

pandas.core.series.Series

In [5]:
s = pd.Series(100, index = list(range(100,105))) # Creating using a scalar
print(s)

100    100
101    100
102    100
103    100
104    100
dtype: int64


### Series from numpy array

In [6]:
mydata = np.array(['a','b','c','d']) # Creating uing numpy array
x=pd.Series(mydata)
x

0    a
1    b
2    c
3    d
dtype: object

In [7]:
s = pd.Series(mydata, index = list(range(100, 104)))
s

100    a
101    b
102    c
103    d
dtype: object

In [8]:
s=np.random.randint(10,size=(8))
pd.Series(s)

0    9
1    8
2    4
3    0
4    9
5    4
6    2
7    8
dtype: int64

### Series from lists

In [9]:
# string
country = ['India','Pakistan','USA','Nepal','Srilanka']

pd.Series(country)

0       India
1    Pakistan
2         USA
3       Nepal
4    Srilanka
dtype: object

In [10]:
# integers
runs = [13,24,56,78,100]

runs_ser = pd.Series(runs)

In [6]:
# custom index
marks = [67,57,89,100]
subjects = ['maths','english','science','hindi']

pd.Series(marks,index=subjects)

maths       67
english     57
science     89
hindi      100
dtype: int64

In [7]:
# setting a name
marks = pd.Series(marks,index=subjects,name='marks')
marks

maths       67
english     57
science     89
hindi      100
Name: marks, dtype: int64

In [8]:
marks.values

array([ 67,  57,  89, 100])

### Series from dict

In [13]:
marks = {
    'maths':67,
    'english':57,
    'science':89,
    'hindi':100,
    'arts':np.nan
}

marks_series = pd.Series(marks,name='MARKS')
marks_series

maths       67.0
english     57.0
science     89.0
hindi      100.0
arts         NaN
Name: MARKS, dtype: float64

### Series Attributes and methods

In [14]:
# size
marks_series.size

5

In [15]:
#count returns the non nan values
marks_series.count()

4

In [16]:
marks_series.shape

(5,)

In [17]:
# dtype
marks_series.dtype

dtype('float64')

In [18]:
# name
marks_series.name

'MARKS'

In [19]:
# is_unique
marks_series.is_unique



True

In [20]:
pd.Series([1,1,2,3,4,5]).is_unique

False

In [21]:
# index
marks_series.index

Index(['maths', 'english', 'science', 'hindi', 'arts'], dtype='object')

In [22]:
# values
marks_series.values

array([ 67.,  57.,  89., 100.,  nan])

In [6]:
s=np.random.randint(20,size=70)
series=pd.Series(s)

In [7]:
series

0     14
1     18
2      4
3     10
4      8
      ..
65    11
66     4
67     4
68     3
69    17
Length: 70, dtype: int32

In [8]:
series.head()

0    14
1    18
2     4
3    10
4     8
dtype: int32

In [25]:
series.head(3)

0    15
1     2
2    18
dtype: int64

In [26]:
series.tail()

65    12
66    19
67    15
68    18
69    16
dtype: int64

In [11]:
series.sample()

63    10
dtype: int32

In [28]:
series.sample(6)

67    15
66    19
10     1
8     17
12    16
26    11
dtype: int64

In [29]:
series.tail(4)

66    19
67    15
68    18
69    16
dtype: int64

In [30]:
series.value_counts()

15    6
18    6
17    6
0     6
10    5
7     4
3     4
8     4
1     4
6     4
19    3
11    3
4     3
12    3
14    2
16    2
9     2
2     1
13    1
5     1
Name: count, dtype: int64

In [31]:
series.sort_values()

48     0
22     0
60     0
33     0
53     0
      ..
2     18
21    18
27    19
54    19
66    19
Length: 70, dtype: int64

In [32]:
series.sort_values(ascending=False)

27    19
66    19
54    19
2     18
68    18
      ..
22     0
9      0
53     0
48     0
33     0
Length: 70, dtype: int64

In [33]:
series.sort_index()

0     15
1      2
2     18
3     12
4     17
      ..
65    12
66    19
67    15
68    18
69    16
Length: 70, dtype: int64

In [34]:
series.count()

70

In [35]:
series.mean()


9.8

In [36]:
print(series.median())


10.0


In [37]:
print(series.mode())


0     0
1    15
2    17
3    18
dtype: int64


In [38]:
print(series.std())


6.1752192943516855


In [39]:
print(series.var())

38.13333333333333


In [40]:
series.max()

19

In [41]:
series.min()

0

In [42]:
series.sum()

686

In [43]:
series.prod()

0

In [44]:
series.describe()

count    70.000000
mean      9.800000
std       6.175219
min       0.000000
25%       4.250000
50%      10.000000
75%      15.000000
max      19.000000
dtype: float64

In [45]:
#indexing
series

0     15
1      2
2     18
3     12
4     17
      ..
65    12
66    19
67    15
68    18
69    16
Length: 70, dtype: int64

In [46]:
series[2]

18

In [47]:
series[3:6]

3    12
4    17
5    10
dtype: int64

In [49]:
series[-4:]

66    19
67    15
68    18
69    16
dtype: int64

In [50]:
#fancy indexing
series[[3,7,9]]

3    12
7    10
9     0
dtype: int64

# Editing a series

In [51]:
series

0     15
1      2
2     18
3     12
4     17
      ..
65    12
66    19
67    15
68    18
69    16
Length: 70, dtype: int64

In [52]:
series[0]=100

In [53]:
series

0     100
1       2
2      18
3      12
4      17
     ... 
65     12
66     19
67     15
68     18
69     16
Length: 70, dtype: int64

In [54]:
series[[1,2]]=200,300

In [55]:
series

0     100
1     200
2     300
3      12
4      17
     ... 
65     12
66     19
67     15
68     18
69     16
Length: 70, dtype: int64

In [56]:
series.duplicated().sum()

48

In [57]:
series[series.duplicated()]

7     10
8     17
11    17
15    12
16    17
18    15
22     0
24     8
25     8
26    11
28     7
29     4
31    10
32    15
33     0
34     6
36    14
37    10
38    18
39     1
40    18
41    18
42     7
43     6
44     9
45     8
46     3
47    15
48     0
49    17
50     1
51     1
53     0
54    19
56     3
57     4
58     3
59    11
60     0
61    10
62     6
63    17
64     7
65    12
66    19
67    15
68    18
69    16
dtype: int64

In [58]:
series.drop_duplicates()

0     100
1     200
2     300
3      12
4      17
5      10
6       8
9       0
10      1
12     16
13     11
14     15
17      4
19      7
20      6
21     18
23     14
27     19
30      9
35      3
52     13
55      5
dtype: int64

In [59]:
series.drop(0)

1     200
2     300
3      12
4      17
5      10
     ... 
65     12
66     19
67     15
68     18
69     16
Length: 69, dtype: int64

In [60]:
series[0]=500

In [61]:
series

0     500
1     200
2     300
3      12
4      17
     ... 
65     12
66     19
67     15
68     18
69     16
Length: 70, dtype: int64

# filtering

In [62]:
series<15

0     False
1     False
2     False
3      True
4     False
      ...  
65     True
66    False
67    False
68    False
69    False
Length: 70, dtype: bool

In [63]:
series[series<15]

3     12
5     10
6      8
7     10
9      0
10     1
13    11
15    12
17     4
19     7
20     6
22     0
23    14
24     8
25     8
26    11
28     7
29     4
30     9
31    10
33     0
34     6
35     3
36    14
37    10
39     1
42     7
43     6
44     9
45     8
46     3
48     0
50     1
51     1
52    13
53     0
55     5
56     3
57     4
58     3
59    11
60     0
61    10
62     6
64     7
65    12
dtype: int64

In [64]:
series[series<15].size

46

In [65]:
series[series.between(13,57)]

4     17
8     17
11    17
12    16
14    15
16    17
18    15
21    18
23    14
27    19
32    15
36    14
38    18
40    18
41    18
47    15
49    17
52    13
54    19
63    17
66    19
67    15
68    18
69    16
dtype: int64

In [66]:
series[series%2==0].size

37

In [67]:
series[series.isin([19,15])]

14    15
18    15
27    19
32    15
47    15
54    19
66    19
67    15
dtype: int64

In [68]:
#membership operators
8 in series.values

True

In [69]:
100 in series.index

False

In [70]:
#find how many negative numbers are present.
#find how many numbers are divisible by 4.

### Handling null values

In [12]:
temp = pd.Series([1,2,3,np.nan,5,6,np.nan,8,np.nan,10])
temp

0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

In [15]:
# isnull
temp.isnull().sum()

3

In [73]:
# dropna
temp.dropna()

0     1.0
1     2.0
2     3.0
4     5.0
5     6.0
7     8.0
9    10.0
dtype: float64

In [18]:
# fillna
temp.fillna(temp.mean())

0     1.0
1     2.0
2     3.0
3     5.0
4     5.0
5     6.0
6     5.0
7     8.0
8     5.0
9    10.0
dtype: float64