# Pandas Part-3

## 11. How to bin a numeric series to 10 groups of equal size?

In [3]:
# Input
ser = pd.Series(np.random.random(20))
print(ser.head())

# Solution
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], 
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

0    0.895534
1    0.218012
2    0.912997
3    0.799037
4    0.173699
dtype: float64


<IPython.core.display.Javascript object>

0    10th
1     3rd
2    10th
3     8th
4     2nd
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

## 12. How to convert a numpy array to a dataframe of given shape? (L1)

In [5]:
# Input
ser = pd.Series(np.random.randint(1, 10, 35))

# Solution
df = pd.DataFrame(ser.values.reshape(7,5))
print(df)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   0  1  2  3  4
0  8  1  1  3  4
1  2  2  6  8  4
2  1  8  9  2  6
3  8  1  2  4  3
4  6  3  3  6  9
5  3  8  3  4  6
6  3  1  3  4  6


## 13. How to extract items at given positions from a series

In [10]:
# Input
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

# Solution
ser.take(pos)

<IPython.core.display.Javascript object>

0     a
4     e
8     i
14    o
20    u
dtype: object

## 14. How to stack two series vertically and horizontally ?

In [13]:
from warnings import filterwarnings
filterwarnings(action='ignore')

In [14]:
# Input
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

# Output
# Vertical
ser1.append(ser2)

# Horizontal
df = pd.concat([ser1, ser2], axis=1)
print(df)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e


## 15. How to get the positions of items of series A in another series B?

In [15]:
# Input
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

# Solution 1
[np.where(i == ser1)[0].tolist()[0] for i in ser2]

# Solution 2
[pd.Index(ser1).get_loc(i) for i in ser2]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[5, 4, 0, 8]

## 16. How to convert the first character of each element in a series to uppercase?

In [16]:
# Input
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

# Solution 1
ser.map(lambda x: x.title())

# Solution 2
ser.map(lambda x: x[0].upper() + x[1:])

# Solution 3
pd.Series([i.title() for i in ser])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

0     How
1      To
2    Kick
3    Ass?
dtype: object

## 17. How to calculate the number of characters in each word in a series?

In [17]:
# Input
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

# Solution
ser.map(lambda x: len(x))

<IPython.core.display.Javascript object>

0    3
1    2
2    4
3    4
dtype: int64

## 18. How to convert a series of date-strings to a timeseries?

In [18]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

# Solution 1
from dateutil.parser import parse
ser.map(lambda x: parse(x))

# Solution 2
pd.to_datetime(ser)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]

## 19. How to get the day of month, week number, day of year and day of week from a series of date strings?

In [21]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

# Solution
from dateutil.parser import parse
ser_ts = ser.map(lambda x: parse(x))

# day of month
print("Date: ", ser_ts.dt.day.tolist())

# week number
print("Week number: ", ser_ts.dt.weekofyear.tolist())

# day of year
print("Day number of year: ", ser_ts.dt.dayofyear.tolist())


<IPython.core.display.Javascript object>

Date:  [1, 2, 3, 4, 5, 6]
Week number:  [53, 5, 9, 14, 19, 23]
Day number of year:  [1, 33, 63, 94, 125, 157]



## 20. How to filter words that contain atleast 2 vowels from a series?

In [23]:
# Input
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])

# Solution
from collections import Counter
mask = ser.map(lambda x: sum([Counter(x.lower()).get(i, 0) for i in list('aeiou')]) >= 2)
ser[mask]

<IPython.core.display.Javascript object>

0     Apple
1    Orange
4     Money
dtype: object

In [None]:
for any query contact @ https://www.linkedin.com/in/vigneshwar2103/