How to import pandas and check the version?

How to create a series from a list, numpy array and dict?


How to convert the index of a series into a column of a dataframe?


How to combine many series to form a dataframe?


How to assign name to the series’ index?

How to get the items of series A not present in series B?

How to get the items not common to both series A and series B?


How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

How to get frequency counts of unique items of a series?

How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

How to bin a numeric series to 10 groups of equal size?


How to convert a numpy array to a dataframe of given shape? 

In [1]:
import numpy as np  # optional
import pandas as pd
print(pd.__version__)
print(pd.show_versions(as_json=True))

0.21.0
{'system': {'commit': None}, 'dependencies': {'tables': '3.4.2', 'sqlalchemy': '1.1.13', 'patsy': '0.4.1', 'pyarrow': None, 'scipy': '1.0.0', 'pandas': '0.21.0', 'xlrd': '1.1.0', 'feather': None, 'pip': '9.0.1', 'jinja2': '2.10', 'Cython': '0.26', 'setuptools': '36.5.0', 's3fs': None, 'dateutil': '2.6.1', 'bs4': '4.6.0', 'lxml': '3.8.0', 'psycopg2': '2.7.5 (dt dec pq3 ext lo64)', 'html5lib': '0.9999999', 'pymysql': None, 'pandas_datareader': '0.5.0', 'openpyxl': '2.4.8', 'matplotlib': '2.0.2', 'IPython': '6.1.0', 'bottleneck': '1.2.1', 'pytest': '3.2.1', 'pandas_gbq': None, 'numpy': '1.13.3', 'xlsxwriter': '0.9.8', 'pytz': '2017.3', 'xlwt': '1.3.0', 'xarray': None, 'blosc': None, 'sphinx': '1.6.3', 'numexpr': '2.6.2', 'fastparquet': None}}
None


  """)


In [2]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [4]:
mydict

ser1 = pd.Series(mylist)
ser2 = pd.Series(myarr)
ser3 = pd.Series(mydict)
print(ser3.head())

a    0
b    1
c    2
d    4
e    3
dtype: int64


In [5]:
# How to convert the index of a series into a column of a dataframe?
# Input
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

# Solution
df = ser.to_frame().reset_index()
print(df.head())

  index  0
0     a  0
1     b  1
2     c  2
3     d  4
4     e  3


In [6]:
# How to combine many series to form a dataframe?

# Input
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

# Solution 1
df = pd.concat([ser1, ser2], axis=1)

# Solution 2
df = pd.DataFrame({'col1': ser1, 'col2': ser2})
print(df.head())


  col1  col2
0    a     0
1    b     1
2    c     2
3    e     3
4    d     4


In [7]:
#How to assign name to the series’ index?
# Input
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

# Solution
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

In [8]:
#How to get the items of series A not present in series B?
# Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

In [10]:
#How to get the items not common to both series A and series B?

# Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution
ser_u = pd.Series(np.union1d(ser1, ser2))  # union
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [12]:
# How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

# Input
np.random.RandomState(100)
ser = pd.Series(np.random.normal(10, 5, 25))

# Solution
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([  0.92992352,   6.37072763,   9.91011028,  14.08254961,  20.86100858])

In [13]:
# How to get frequency counts of unique items of a series?

# Input
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

# Solution
ser.value_counts()

b    7
e    6
g    4
d    4
a    3
f    2
h    2
c    2
dtype: int64

In [14]:
#How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

# Input
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

# Solution
print("Top 2 Freq:", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

Top 2 Freq: 4    5
2    3
3    2
1    2
dtype: int64


0     Other
1         2
2     Other
3         2
4     Other
5         4
6         2
7     Other
8         4
9         4
10        4
11        4
dtype: object

In [15]:
# How to bin a numeric series to 10 groups of equal size?
# Input
ser = pd.Series(np.random.random(20))
print(ser.head())

# Solution
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], 
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()

0    0.510773
1    0.320598
2    0.394476
3    0.729074
4    0.376425
dtype: float64


0    6th
1    3rd
2    4th
3    7th
4    4th
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]

In [16]:
# How to convert a numpy array to a dataframe of given shape?

# Input
ser = pd.Series(np.random.randint(1, 10, 35))

# Solution
df = pd.DataFrame(ser.values.reshape(7,5))
print(df)

   0  1  2  3  4
0  5  1  2  4  5
1  9  3  7  1  7
2  8  2  8  5  9
3  3  2  4  1  2
4  4  3  2  8  8
5  5  1  8  9  6
6  6  2  6  1  4
