# 101 Pandas Exercises

### Source: [Selva Prabhakaran on Machine Learning Plus](https://www.machinelearningplus.com/python/101-pandas-exercises-python/)

### 001 - L1: Import Pandas and print the version number

In [1]:
import pandas as pd
import numpy as np

In [2]:
print(pd.__version__)

1.0.3


### 002 - L1: Create a pandas series from each of the items below: a list, numpy and a dictionary

In [3]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [4]:
s1 = pd.Series(mylist)
s2 = pd.Series(myarr)
s3 = pd.Series(mydict)

### 003 - L1: Convert the series ser into a dataframe with its index as another column on the dataframe

In [35]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict, name='nums')

In [40]:
ser.to_frame().reset_index()

Unnamed: 0,index,nums
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


### 004 - L1: Combine ser1 and ser2 to form a dataframe

In [7]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [8]:
ser1

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [9]:
df = pd.concat([ser1,ser2], axis=1) # Alternative: df.DataFrame({'col1':ser1, 'col2': ser2})

In [10]:
df

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


### 005 - L1: Give a name to the series *ser* calling it *alphabets*

In [11]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [12]:
ser.name = "alphabets"

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: alphabets, dtype: object

### 006 - L2: From ser1 get items not present in ser2

In [13]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [14]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

### 007 - L2: Get all items of ser1 and ser2 not common to both 

In [15]:
ser1[~ser1.isin(ser2)].append(ser2[~ser2.isin(ser1)])

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64

### 008 - L2: Compute the minimum, 25th percentile, median, 75th, and maximum of ser

In [16]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [17]:
ser.describe()

count    25.000000
mean     10.426425
std       6.103765
min      -0.411490
25%       5.796571
50%      10.472992
75%      13.814067
max      21.724646
dtype: float64

#### Alternative:

In [18]:
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([-0.41148966,  5.79657077, 10.47299245, 13.81406736, 21.72464649])

### 009 - L1: Calculte the frequency counts of each unique value ser

In [19]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [20]:
ser.value_counts()

d    6
f    5
a    5
g    4
e    3
h    3
b    3
c    1
dtype: int64

### 010 - L2: From ser, keep the top 2 most frequent items as it is and replace everything else as *Other*

In [21]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
ser

0     1
1     1
2     3
3     4
4     1
5     4
6     4
7     4
8     3
9     3
10    4
11    4
dtype: int32

In [22]:
ser.value_counts().index[:2]

Int64Index([4, 3], dtype='int64')

In [23]:
ser[~ser.isin(ser.value_counts().index[:2])] = "Other"

In [24]:
ser

0     Other
1     Other
2         3
3         4
4     Other
5         4
6         4
7         4
8         3
9         3
10        4
11        4
dtype: object

### 011 - L2: Bin the series ser into 10 equal deciles and replace the values with the bin name

## ?

### 012 - L1: Reshape the series ser into a dataframe with 7 rows and 5 columns

In [44]:
ser = pd.Series(np.random.randint(1, 10, 35))

In [26]:
df = pd.DataFrame(ser.values.reshape(7,5))

In [27]:
df

Unnamed: 0,0,1,2,3,4
0,6,1,4,6,3
1,8,7,2,3,8
2,7,3,2,2,5
3,8,9,3,8,1
4,6,8,1,2,2
5,2,7,3,3,4
6,7,7,9,8,4


### 013 - L2: Find the positions of numbers that are multiples of 3 from ser

In [54]:
ser = pd.Series(np.random.randint(1, 10, 7))

In [56]:
ser[ser%3==0].index

Int64Index([0], dtype='int64')

### 014 - L1: From ser, extract the items at positions in list pos

In [87]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

In [88]:
ser[pos]

0     a
4     e
8     i
14    o
20    u
dtype: object

### 015 - L1: Stack ser1 and ser2 vertically and horizontally (to form a dataframe).

In [61]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

In [63]:
pd.concat([ser1,ser2]) # vertical

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

In [64]:
pd.concat([ser1,ser2], axis=1) # horizontal

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e


### 016 - L2: Get the positions of items of ser2 in ser1 as a list

In [89]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

In [100]:
[pd.Index(ser1).get_loc(i) for i in ser2]

[5, 4, 0, 8]