#### 21. How to convert a series of date-strings to a timeseries?

In [None]:
import pandas as pd

In [None]:
ser = pd.Series(['01 Jan 2010', 
                 '02-02-2011',
                 '20120303',
                 '2013/04/04', 
                 '2014-05-05',
                 '2015-06-06T12:20'])

In [None]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05'
, '2015-06-06T12:20'])

# Solution 1
from dateutil.parser import parse
ser.map(lambda x: parse(x))
# Solution 2
pd.to_datetime(ser)

#### 22. How to get the day of month, week number, day of year and day of week from a series of date strings?

In [None]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05'
, '2015-06-06T12:20'])
# Solution
from dateutil.parser import parse
ser_ts = ser.map(lambda x: parse(x))
# day of month
print("Date: ", ser_ts.dt.day.tolist())
# week number
print("Week number: ", ser_ts.dt.weekofyear.tolist())
# day of year
print("Day number of year: ", ser_ts.dt.dayofyear.tolist())
# day of week
print("Day of week: ", ser_ts.dt.weekday_name.tolist())

#### 23. How to convert year-month string to dates corresponding to the 4th day of the month?

In [None]:
# Input
ser = pd.Series(['Jan 2010', 'Feb 2011', 'Mar 2012'])
# Solution 1
from dateutil.parser import parse
# Parse the date
ser_ts = ser.map(lambda x: parse(x))
# Construct date string with date as 4
ser_datestr = ser_ts.dt.year.astype('str') + '-' + ser_ts.dt.month.astype('str') +
'-' + '04'
# Format it.
[parse(i).strftime('%Y-%m-%d') for i in ser_datestr]
# Solution 2
ser.map(lambda x: parse('04 ' + x))

#### 24. How to filter words that contain atleast 2 vowels from a series?

In [None]:
# Input
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])
# Solution
from collections import Counter
mask = ser.map(lambda x: sum([Counter(x.lower()).get(i, 0) for i in list('aeiou')])
>= 2)
ser[mask]

#### 25. How to filter valid emails from a series?

In [None]:
# Input
emails = pd.Series(['buying books at amazom.com', 'rameses@egypt.com', 'matt@t.co',
'narendra@modi.com'])
# Solution 1 (as series of strings)
import re
pattern ='[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}'
mask = emails.map(lambda x: bool(re.match(pattern, x)))
emails[mask]
# Solution 2 (as series of list)
emails.str.findall(pattern, flags=re.IGNORECASE)
# Solution 3 (as list)
[x[0] for x in [re.findall(pattern, email) for email in emails] if len(x) > 0]

#### 26. How to get the mean of a series grouped by another series?

In [None]:
# Input
fruit = pd.Series(np.random.choice(['apple', 'banana', 'carrot'], 10))
weights = pd.Series(np.linspace(1, 10, 10))
# Solution
weights.groupby(fruit).mean()