# 101 Pandas Exercises

### Source: [Selva Prabhakaran on Machine Learning Plus](https://www.machinelearningplus.com/python/101-pandas-exercises-python/)

### 001 - L1: Import Pandas and print the version number

In [3]:
import pandas as pd
import numpy as np

In [5]:
print(pd.__version__)

1.0.3


### 002 - L1: Create a pandas series from each of the items below: a list, numpy and a dictionary

In [8]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [12]:
s1 = pd.Series(mylist)
s2 = pd.Series(myarr)
s3 = pd.Series(mydict)

### 003 - L1: Convert the series ser into a dataframe with its index as another column on the dataframe

In [18]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

In [25]:
pd.DataFrame(ser,index=ser.index)

Unnamed: 0,0
a,0
b,1
c,2
e,3
d,4
f,5
g,6
h,7
i,8
j,9


### 004 - L1: Combine ser1 and ser2 to form a dataframe

In [13]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [14]:
ser1

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [19]:
df = pd.concat([ser1,ser2], axis=1) # Alternative: df.DataFrame({'col1':ser1, 'col2': ser2})

In [20]:
df

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


### 005 - L1: Give a name to the series *ser* calling it *alphabets*

In [21]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [23]:
ser.name = "alphabets"

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: alphabets, dtype: object

### 006 - L2: From ser1 remove items present in ser2

In [25]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [31]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

### 007 - L2: Get all items of ser1 and ser2 not common to both 

In [33]:
ser1[~ser1.isin(ser2)].append(ser2[~ser2.isin(ser1)])

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64

### 008 - L2: Compute the minimum, 25th percentile, median, 75th, and maximum of ser

In [34]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [36]:
ser.describe()

count    25.000000
mean     10.574738
std       4.770065
min       3.674580
25%       7.340491
50%       9.111537
75%      13.643056
max      23.327007
dtype: float64

#### Alternative:

In [42]:
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 3.6745803 ,  7.34049053,  9.11153666, 13.64305615, 23.32700728])

### 009 - L1: Calculte the frequency counts of each unique value ser

In [44]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [52]:
ser.value_counts()

h    6
e    5
d    5
b    4
g    3
f    3
c    3
a    1
dtype: int64

### 010 - L2: From ser, keep the top 2 most frequent items as it is and replace everything else as *Other*

In [71]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
ser

0     2
1     3
2     4
3     4
4     1
5     2
6     3
7     3
8     1
9     3
10    4
11    3
dtype: int32

In [72]:
ser.value_counts().index[:2]

Int64Index([3, 4], dtype='int64')

In [73]:
ser[~ser.isin(ser.value_counts().index[:2])] = "Other"

In [74]:
ser

0     Other
1         3
2         4
3         4
4     Other
5     Other
6         3
7         3
8     Other
9         3
10        4
11        3
dtype: object

### 011 - L2: Bin the series ser into 10 equal deciles and replace the values with the bin name

## ?

### 012 - L1: Reshape the series ser into a dataframe with 7 rows and 5 columns

In [75]:
ser = pd.Series(np.random.randint(1, 10, 35))

In [81]:
df = pd.DataFrame(ser.values.reshape(7,5))

In [82]:
df

Unnamed: 0,0,1,2,3,4
0,4,2,2,8,6
1,5,2,5,5,5
2,9,5,6,9,9
3,3,8,6,4,9
4,6,3,2,9,3
5,4,7,8,9,5
6,6,3,4,6,2
