#### https://www.machinelearningplus.com/python/101-pandas-exercises-python/

In [2]:
import pandas as pd
import numpy as np

### 2. How to create a series from a list, numpy array and dict?

In [2]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

x = pd.Series(mylist)
y = pd.Series(myarr)
z = pd.Series(mydict)
x, y, z

(0     a
 1     b
 2     c
 3     e
 4     d
 5     f
 6     g
 7     h
 8     i
 9     j
 10    k
 11    l
 12    m
 13    n
 14    o
 15    p
 16    q
 17    r
 18    s
 19    t
 20    u
 21    v
 22    w
 23    x
 24    y
 25    z
 dtype: object, 0      0
 1      1
 2      2
 3      3
 4      4
 5      5
 6      6
 7      7
 8      8
 9      9
 10    10
 11    11
 12    12
 13    13
 14    14
 15    15
 16    16
 17    17
 18    18
 19    19
 20    20
 21    21
 22    22
 23    23
 24    24
 25    25
 dtype: int32, a     0
 b     1
 c     2
 e     3
 d     4
 f     5
 g     6
 h     7
 i     8
 j     9
 k    10
 l    11
 m    12
 n    13
 o    14
 p    15
 q    16
 r    17
 s    18
 t    19
 u    20
 v    21
 w    22
 x    23
 y    24
 z    25
 dtype: int64)

### 3. How to convert the index of a series into a column of a dataframe?

In [3]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

# pd.DataFrame({'index':ser.index, '0':ser.values})
ser.to_frame().reset_index()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


### How to combine many series to form a dataframe?### 

In [4]:
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


### 5. How to assign name to the series’ index?

In [5]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

ser.name = "test"

ser

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: test, dtype: object

### 6. How to get the items of series A not present in series B?

In [6]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

set(ser1)-set(ser2) #python solution
ser1[~ser1.isin(ser2)] #pandas solution

0    1
1    2
2    3
dtype: int64

### 7. How to get the items not common to both series A and series B?



In [7]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# a = ser1[~ser1.isin(ser2)]
a = set(ser1)-set(ser2)
b = set(ser2)-set(ser1)
pd.Series(list(a)+list(b)).sort_values()


0    1
1    2
2    3
4    6
5    7
3    8
dtype: int64

### 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [8]:
ser = pd.Series(np.random.normal(10, 5, 25))

ser.describe()

count    25.000000
mean     11.656398
std       4.380111
min       2.422999
25%       8.810720
50%      11.601807
75%      14.979812
max      18.558429
dtype: float64

### 9. How to get frequency counts of unique items of a series?

In [9]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.value_counts()

g    6
d    5
b    5
e    4
a    3
c    3
f    2
h    2
dtype: int64

### 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [10]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
top2 = ser.value_counts().head(2).index.to_list()
ser[~ser.isin(top2)] = 'Other'
ser


0         4
1         4
2     Other
3     Other
4     Other
5         2
6         4
7         2
8         4
9     Other
10        2
11    Other
dtype: object

### 11. How to bin a numeric series to 10 groups of equal size?

In [86]:
ser = pd.Series(np.random.random(300)).sort_values()

pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], 
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()


167    1st
211    1st
208    1st
233    1st
182    1st
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]

### 12. How to convert a numpy array to a dataframe of given shape?

In [100]:
ser = pd.Series(np.random.randint(1, 10, 35))
#Reshape the series ser into a dataframe with 7 rows and 5 columns
pd.DataFrame(ser.values.reshape(7, 5))

Unnamed: 0,0,1,2,3,4
0,5,7,4,9,4
1,3,4,6,5,8
2,8,2,8,6,2
3,4,4,6,2,7
4,5,4,9,3,5
5,2,9,9,6,6
6,6,6,4,2,1


### 13. How to find the positions of numbers that are multiples of 3 from a series?

In [137]:
#Find the positions of numbers that are multiples of 3 from ser.
ser = pd.Series(np.random.randint(1, 10, 7))

ser[ser%3 ==0].index.values

array([2, 5], dtype=int64)

### 14. How to extract items at given positions from a series


In [148]:
#From ser, extract the items at positions in list pos.
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

ser.iloc[pos]
# ser.take(pos)

0     a
4     e
8     i
14    o
20    u
dtype: object

### 15. How to stack two series vertically and horizontally ?

In [158]:
#Stack ser1 and ser2 vertically and horizontally (to form a dataframe).
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

#vertically
ser1.append(ser2).reset_index(drop=True)
#horizontally
pd.concat([ser1, ser2], axis=1)

0    0
1    1
2    2
3    3
4    4
5    a
6    b
7    c
8    d
9    e
dtype: object

### 16. How to get the positions of items of series A in another series B?

In [172]:
#Get the positions of items of ser2 in ser1 as a list.
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

ser1[ser1.isin(ser2)].index.to_list()

[0, 4, 5, 8]

### 17. How to compute the mean squared error on a truth and predicted series?

In [177]:
#Compute the mean squared error of truth and pred series.
truth = pd.Series(range(10))
pred = pd.Series(range(10)) + np.random.random(10)

np.square(np.subtract(truth, pred)).mean()

0.2923695528905816

### 18. How to convert the first character of each element in a series to uppercase?

In [185]:
#Change the first character of each word to upper case in each word of ser.
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
ser.apply(lambda  x: x[0].upper() + x[1:])

0     How
1      To
2    Kick
3    Ass?
dtype: object

### 19. How to calculate the number of characters in each word in a series?

In [4]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
ser.apply(lambda x: len(x))

0    3
1    2
2    4
3    4
dtype: int64

### 20. How to compute difference of differences between consequtive numbers of a series?

In [13]:
#Difference of differences between the consequtive numbers of ser.
ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])
ser.diff().tolist()
ser.diff().diff().tolist()

[nan, nan, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0]

### 21. How to convert a series of date-strings to a timeseries?

In [17]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
pd.to_datetime(ser)


0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]

### 22. How to get the day of month, week number, day of year and day of week from a series of date strings?

In [39]:
#Get the day of month, week number, day of year and day of week from ser.
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
ser_dt = pd.to_datetime(ser)
print('Date: ' + str(ser_dt.dt.day.tolist()))
print('Week number: ' + str(ser_dt.dt.week.tolist()))
print('Day num of year: ' + str(ser_dt.dt.dayofyear.tolist()))
print('Day of week: ' + str(ser_dt.dt.day_name().tolist()))


Date: [1, 2, 3, 4, 5, 6]
Week number: [53, 5, 9, 14, 19, 23]
Day num of year: [1, 33, 63, 94, 125, 157]
Day of week: ['Friday', 'Wednesday', 'Saturday', 'Thursday', 'Monday', 'Saturday']


### 23. How to convert year-month string to dates corresponding to the 4th day of the month?

In [57]:
#Change ser to dates that start with 4th of the respective months.
ser = pd.Series(['Jan 2010', 'Feb 2011', 'Mar 2012'])
ser_dt = pd.to_datetime(ser)
pd.to_datetime(ser_dt.dt.year.astype(str)+ser_dt.dt.month.astype(str)+'04', format='%Y%m%d')

0   2010-10-04
1   2011-02-04
2   2012-03-04
dtype: datetime64[ns]

### 24. How to filter words that contain atleast 2 vowels from a series?

In [66]:
#From ser, extract words that contain atleast 2 vowels.
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])

def check_2vowels(x):
    count=0
    for char in x:
        if char.lower() in ['a','e','i','o','u']:
            count+=1

    if count >=2:
        return True
    else:
        return False
                 
ser[ser.apply(lambda x: check_2vowels(x))]

0     Apple
1    Orange
4     Money
dtype: object