#### 1. How to import pandas and check the version?

In [4]:
import numpy as np # optional
import pandas as pd

print(pd.__version__)
print(pd.show_versions(as_json=True))

1.4.3
{
  "system": {
    "commit": "e8093ba372f9adfe79439d90fe74b0b5b6dea9d6",
    "python": "3.10.5.final.0",
    "python-bits": 64,
    "OS": "Windows",
    "OS-release": "10",
    "Version": "10.0.19044",
    "machine": "AMD64",
    "processor": "Intel64 Family 6 Model 126 Stepping 5, GenuineIntel",
    "byteorder": "little",
    "LC_ALL": null,
    "LANG": null,
    "LOCALE": {
      "language-code": "pt_BR",
      "encoding": "cp1252"
    }
  },
  "dependencies": {
    "pandas": "1.4.3",
    "numpy": "1.23.0",
    "pytz": "2022.7.1",
    "dateutil": "2.8.2",
    "setuptools": "58.1.0",
    "pip": "22.2",
    "Cython": null,
    "pytest": null,
    "hypothesis": null,
    "sphinx": null,
    "blosc": null,
    "feather": null,
    "xlsxwriter": null,
    "lxml.etree": "4.9.1",
    "html5lib": "1.1",
    "pymysql": null,
    "psycopg2": null,
    "jinja2": "3.1.2",
    "IPython": "8.4.0",
    "pandas_datareader": "0.10.0",
    "bs4": "4.11.1",
    "bottleneck": null,
    "brotli": 

#### 2. How to create a series from a list, numpy array and dict?

In [5]:
import numpy as np

mylist = list('abcedfghijklmnopqrstuvwxyz')

myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

#### 3. How to convert the index of a series into a column of a dataframe?

In [6]:
# Input
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

In [7]:
# Solution
df = ser.to_frame().reset_index()
print(df.head())

  index  0
0     a  0
1     b  1
2     c  2
3     e  3
4     d  4


#### 4. How to combine many series to form a dataframe?

In [8]:
# Input
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))
# Solution 1
df = pd.concat([ser1, ser2], axis=1)
# Solution 2
df = pd.DataFrame({'col1': ser1, 'col2': ser2})
print(df.head())

  col1  col2
0    a     0
1    b     1
2    c     2
3    e     3
4    d     4


#### 5. How to assign name to the series’ index?

In [9]:
# Input
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
# Solution
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

#### 6. How to get the items of series A not present in series B?

In [10]:
# Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
# Solution
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

#### 7. How to get the items not common to both series A and series B?

In [11]:
# Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
# Solution
ser_u = pd.Series(np.union1d(ser1, ser2)) # union
ser_i = pd.Series(np.intersect1d(ser1, ser2)) # intersect
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

#### 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [12]:
# Input
state = np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))
# Solution
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.25117263,  7.70986507, 10.92259345, 13.36360403, 18.0949083 ])

#### 9. How to get frequency counts of unique items of a series?

In [13]:
# Input
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
# Solution
ser.value_counts()

e    7
a    5
b    4
d    4
h    4
f    2
c    2
g    2
dtype: int64

#### 10. How to keep only top 2 most frequent values as it is and
replace everything else as ‘Other’?

In [14]:
# Input
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
# Solution
print("Top 2 Freq:", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

Top 2 Freq: 3    5
1    4
4    2
2    1
dtype: int64


0         3
1         3
2         3
3         1
4         1
5         3
6     Other
7         3
8     Other
9         1
10        1
11    Other
dtype: object