In [1]:
import pandas as pd
import numpy as np

**How to check the pandas version**

In [2]:
pd.__version__

'0.25.1'

In [5]:
# Print all pandas dependancies
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : None
python           : 3.7.4.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
machine          : AMD64
processor        : Intel64 Family 6 Model 142 Stepping 9, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : None.None

pandas           : 0.25.1
numpy            : 1.16.5
pytz             : 2019.3
dateutil         : 2.8.0
pip              : 19.2.3
setuptools       : 41.4.0
Cython           : 0.29.13
pytest           : 5.2.1
hypothesis       : None
sphinx           : 2.2.0
blosc            : None
feather          : None
xlsxwriter       : 1.2.1
lxml.etree       : 4.4.1
html5lib         : 1.0.1
pymysql          : None
psycopg2         : None
jinja2           : 2.10.3
IPython          : 7.8.0
pandas_datareader: None
bs4              : 4.8.0
bottleneck       : 1.2.1
fastparquet      : None
gcsfs            : None
lxml.etree       : 4.4.1
matplotl

**How to create a Series from a list, numpy.array & dict.**

In [14]:
a_list = list('abcdef')
n_array = np.random.randint(1, 10, 10)
a_dict = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}

print(pd.Series(a_list))
print(pd.Series(n_array))
print(pd.Series(a_dict))

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object
0    2
1    8
2    9
3    7
4    9
5    8
6    9
7    2
8    9
9    3
dtype: int32
A    0
B    1
C    2
D    3
E    4
dtype: int64


**How to convert the index of a series into a column of a dataframe**

In [22]:
a_list = list('abcedfghijklmnopqrstuvwxyz')
n_array = np.arange(26)
my_dict = dict(zip(a_list, n_array))
serie = pd.Series(my_dict)
serie.head()

a    0
b    1
c    2
e    3
d    4
dtype: int64

In [25]:
df_serie = pd.DataFrame(serie)
df_serie.reset_index().head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


**How to combine many series to form a dataframe**

In [46]:
ser_1 = pd.Series(list('abcdef'))
ser_2 = pd.Series(np.arange(6))
ser_3 = pd.Series({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5})

a_df = pd.DataFrame(ser_1, ser_2)
print(df.reset_index().head())

b_df = pd.DataFrame({'col1': ser_1, 'col2': ser_2, 'col3': ser_3})
print(b_df)

c_df = pd.concat([ser_1, ser_2, ser_3], axis=1)
print(c_df)

   index  0
0      0  a
1      1  b
2      2  c
3      3  d
4      4  e
  col1  col2  col3
0    a   0.0   NaN
1    b   1.0   NaN
2    c   2.0   NaN
3    d   3.0   NaN
4    e   4.0   NaN
5    f   5.0   NaN
A  NaN   NaN   0.0
B  NaN   NaN   1.0
C  NaN   NaN   2.0
D  NaN   NaN   3.0
E  NaN   NaN   4.0
F  NaN   NaN   5.0
     0    1    2
0    a  0.0  NaN
1    b  1.0  NaN
2    c  2.0  NaN
3    d  3.0  NaN
4    e  4.0  NaN
5    f  5.0  NaN
A  NaN  NaN  0.0
B  NaN  NaN  1.0
C  NaN  NaN  2.0
D  NaN  NaN  3.0
E  NaN  NaN  4.0
F  NaN  NaN  5.0


**How to get the items of series A not present in series B**

In [48]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [49]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

**How to get the items not common to both series A and series B**

In [53]:
a_not_b = ser1[~ser1.isin(ser2)]
b_not_a = ser2[~ser2.isin(ser1)]
a_not_b.append(b_not_a, ignore_index = True)

0    1
1    2
2    3
3    6
4    7
5    8
dtype: int64

**How to get the minimum, 25th percentile, median, 75th, and max of a numeric series**

In [75]:
normal = pd.Series(np.random.normal(20, 6, 1000))
normal.describe()

count    1000.000000
mean       20.131768
std         6.076332
min         3.290676
25%        16.030878
50%        20.027438
75%        24.034145
max        41.052099
dtype: float64

In [88]:
pd.Series(np.random.randint(1, 12, 1000)).value_counts()[:2]

7    110
5    105
dtype: int64