# 101 Pandas Exercises for Data Analysis


### 1. How to import pandas and check the version?

In [1]:
import pandas as pd

In [3]:
print(pd.__version__)  # Check the version of pandas

2.3.1


In [48]:
pd.show_versions(as_json=True)  # Show detailed version information

{
  "system": {
    "commit": "c888af6d0bb674932007623c0867e1fbd4bdc2c6",
    "python": "3.13.5",
    "python-bits": 64,
    "OS": "Windows",
    "OS-release": "11",
    "Version": "10.0.26100",
    "machine": "AMD64",
    "processor": "AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD",
    "byteorder": "little",
    "LC_ALL": null,
    "LANG": null,
    "LOCALE": {
      "language-code": "English_United States",
      "encoding": "1252"
    }
  },
  "dependencies": {
    "pandas": "2.3.1",
    "numpy": "2.3.1",
    "pytz": "2025.2",
    "dateutil": "2.9.0.post0",
    "pip": "25.1.1",
    "Cython": null,
    "sphinx": null,
    "IPython": "9.4.0",
    "adbc-driver-postgresql": null,
    "adbc-driver-sqlite": null,
    "bs4": null,
    "blosc": null,
    "bottleneck": null,
    "dataframe-api-compat": null,
    "fastparquet": null,
    "fsspec": null,
    "html5lib": null,
    "hypothesis": null,
    "gcsfs": null,
    "jinja2": null,
    "lxml.etree": null,
    "matplotlib": "3.10.3",

### 2. How to create a series from a list, numpy array and dict?
Create a pandas series from each of the items below: a list, numpy and a dictionary

In [None]:
import numpy as np
mylist = list('abcdefghijklmnopqrstuvwxyz') # Create a list of letters from a to z
myarray = np.arange(26) # Create a numpy array with values from 0 to 25
mydict = dict(zip(mylist, myarray)) # Create a dictionary mapping letters to numbers

In [16]:
ser1 = pd.Series(mylist)
ser2 = pd.Series(myarray)   
ser3 = pd.Series(mydict)
print(ser1)  # Display the first few rows of the series

0     a
1     b
2     c
3     d
4     e
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object


### 3. How to convert the index of a series into a column of a dataframe?

Difficulty Level: L1

Convert the series ser into a dataframe with its index as another column on the dataframe.

In [20]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

In [21]:
df = ser.reset_index()  # Convert the index of the series into a column of a dataframe
print(df.head())  # Display the first few rows of the dataframe

  index  0
0     a  0
1     b  1
2     c  2
3     e  3
4     d  4


### 4. How to combine many series to form a dataframe?

Difficulty Level: L1

Combine ser1 and ser2 to form a dataframe.

In [22]:
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [None]:
# Solution 1
df = pd.concat([ser1, ser2], axis=1) # Combine series into a dataframe

# Solution 2
df = pd.DataFrame({'A': ser1, 'B': ser2}) # Combine series into a dataframe using a dictionary
print(df.head())

   A  B
0  a  0
1  b  1
2  c  2
3  e  3
4  d  4


### 5. How to assign name to the series’ index?

Difficulty Level: L1

Give a name to the series ser calling it ‘alphabets’.

In [26]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [27]:
# Solution
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

### 6. How to get the items of series A not present in series B?

Difficulty Level: L2

From ser1 remove items present in ser2.

In [28]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [30]:
ser1[~ser1.isin(ser2)] # Get items in ser1 not present in ser2

0    1
1    2
2    3
dtype: int64

### 7. How to get the items not common to both series A and series B?

Difficulty Level: L2

Get all items of ser1 and ser2 not common to both.

In [31]:
#Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [36]:
# Solution
ser_u = pd.Series(np.union1d(ser1, ser2))  # union
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
ser_u[~ser_u.isin(ser_i)]
print(ser_u)
print(ser_i)
print(ser_u[~ser_u.isin(ser_i)])  # Get items in ser1 or ser2 not present in both

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64
0    4
1    5
dtype: int64
0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64


### 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

Difficuty Level: L2

Compute the minimum, 25th percentile, median, 75th, and maximum of ser.

In [43]:
#Input
ser = pd.Series(np.random.normal(10, 5, 25)) # Generate a series of 25 random numbers from a normal distribution with mean 10 and standard deviation 5

In [44]:
# Solution
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([-5.09307076,  5.39168904,  9.32278659, 13.15004192, 16.78376458])

### 9. How to get frequency counts of unique items of a series?

Difficulty Level: L1

Calculte the frequency counts of each unique value ser.

In [46]:
# Input
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [47]:
# Solution
ser.value_counts()

d    6
e    5
c    5
b    4
g    3
f    3
h    3
a    1
Name: count, dtype: int64

### 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?
Difficulty Level: L2

From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

In [50]:
# Input
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [52]:
top2 = ser.value_counts().index[:2]
ser = ser.where(ser.isin(top2), other='Other')
ser

0         2
1         3
2     Other
3     Other
4     Other
5         3
6         3
7         2
8         2
9         2
10    Other
11        3
dtype: object

### 11. How to bin a numeric series to 10 groups of equal size?
Difficulty Level: L2

Bin the series ser into 10 equal deciles and replace the values with the bin name.

Input

In [65]:
ser = pd.Series(np.random.random(20))
print(ser)

0     0.444622
1     0.219547
2     0.645647
3     0.669769
4     0.095872
5     0.807244
6     0.883540
7     0.899696
8     0.525016
9     0.564253
10    0.761371
11    0.770052
12    0.271714
13    0.371752
14    0.711706
15    0.968769
16    0.739110
17    0.551263
18    0.527202
19    0.383447
dtype: float64


In [70]:
# Solution
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], 
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()

0    3rd
1    1st
2    6th
3    6th
4    1st
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

### 12. How to convert a numpy array to a dataframe of given shape? (L1)

Difficulty Level: L1

Reshape the series ser into a dataframe with 7 rows and 5 columns

In [71]:
# Input
ser = pd.Series(np.random.randint(1, 10, 35))

In [73]:
# Solution
df = pd.DataFrame(ser.values.reshape(7,5))
print(df)

   0  1  2  3  4
0  5  6  8  4  2
1  6  1  3  7  1
2  4  4  2  6  1
3  8  9  6  9  4
4  1  3  6  4  4
5  5  8  9  8  2
6  1  3  4  8  6
