In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import timeit

def test_solutions(*solutions, number):
    for idx, solution in enumerate(solutions):
        def timed_solution():
            solution()
            
        time = timeit.timeit(timed_solution, number=number)
        print(f"Solution {idx + 1} Time: {time:.6f} seconds")

def test_solutions_1(*solutions, df, number):
    for idx, solution in enumerate(solutions):
        def timed_solution():
            solution(df.copy())
            
        time = timeit.timeit(timed_solution, number=number)
        print(f"Solution {idx + 1} Time: {time:.6f} seconds")

def test_solutions_3(*solutions, df1, df2, number):
    for idx, solution in enumerate(solutions):
        def timed_solution():
            solution(df1.copy(), df2.copy())
            
        time = timeit.timeit(timed_solution, number=number)
        print(f"Solution {idx + 1} Time: {time:.6f} seconds")

##### 1. Write a Pandas program to select rows where the value in the 'A' column is greater than 4.

In [5]:
df = pd.DataFrame({
    'A': np.random.randint(0, 10, 10),
    'B': np.random.randint(0, 10, 10) * 10,
    'C': np.random.randn(10)
})

df[df['A'] > 4]

Unnamed: 0,A,B,C
0,9,40,0.684853
1,9,0,-0.255233
2,7,20,1.018076
3,7,70,1.856878
5,7,20,0.147511
7,6,90,0.067386
8,9,20,-0.244814
9,7,20,1.64497


##### 2. Write a Pandas program to select only the 'X' and 'Y' columns from the DataFrame.

In [6]:
df = pd.DataFrame({
    'A': np.random.randint(0, 10, 10),
    'X': np.random.randint(0, 10, 10) * 10,
    'Y': np.random.randn(10)
})

df[['X', 'Y']]

Unnamed: 0,X,Y
0,40,0.32064
1,70,1.160488
2,50,-1.141653
3,10,0.286486
4,20,-0.597266
5,60,0.791328
6,60,-0.071735
7,20,0.202255
8,0,0.873579
9,0,-2.164096


##### 3. Write a Pandas program to set a MultiIndex and access specific data using it.

In [38]:
index = [
    ['python', 'scala', 'R', 'Java'],
    ['Junior', 'Middle', 'Senior']
]

multi_index = pd.MultiIndex.from_product(index)
multi_index.size

df = pd.DataFrame({
    'Number employees': np.random.randint(0,6, multi_index.size)
}, index=multi_index)

print("original DataFrame:")
display(df)

display(df.loc['python'])
display(df.loc[:, 'Senior', :])
display(df.loc[:, ['Middle', 'Senior'], :])
display(df.loc[['python', 'scala'], 'Junior', :])
display(df.loc[('scala', 'Senior')].to_frame().T)
display(df.xs('Middle', level=1))

original DataFrame:


Unnamed: 0,Unnamed: 1,Number employees
python,Junior,4
python,Middle,4
python,Senior,0
scala,Junior,5
scala,Middle,2
scala,Senior,3
R,Junior,0
R,Middle,0
R,Senior,4
Java,Junior,3


Unnamed: 0,Number employees
Junior,4
Middle,4
Senior,0


Unnamed: 0,Number employees
python,0
scala,3
R,4
Java,3


Unnamed: 0,Unnamed: 1,Number employees
python,Middle,4
scala,Middle,2
R,Middle,0
Java,Middle,0
python,Senior,0
scala,Senior,3
R,Senior,4
Java,Senior,3


Unnamed: 0,Unnamed: 1,Number employees
python,Junior,4
scala,Junior,5


Unnamed: 0,Unnamed: 1,Number employees
scala,Senior,3


Unnamed: 0,Number employees
python,4
scala,2
R,0
Java,0


##### 4. Write a Pandas program to slice DataFrame based on MultiIndex levels.

In [57]:
index = [
        np.arange(10) * 10,
        np.arange(5)
    ]

multi_index = pd.MultiIndex.from_product(index)

df = pd.DataFrame(
    {'value': np.random.randn(50)},
    index=multi_index
)

display(df.loc[0:50])
display(df.loc[0:50, 1,:])
display(df.loc[0:50, 2:3, :])

Unnamed: 0,Unnamed: 1,value
0,0,-1.360661
0,1,1.212898
0,2,-0.610985
0,3,1.547452
0,4,1.350648
10,0,0.042684
10,1,0.02286
10,2,-0.572221
10,3,-0.001511
10,4,2.975542


Unnamed: 0,Unnamed: 1,value
0,1,1.212898
10,1,0.02286
20,1,1.451941
30,1,-1.457188
40,1,0.307282
50,1,0.583531


Unnamed: 0,Unnamed: 1,value
0,1,1.212898
0,2,-0.610985
0,3,1.547452
10,1,0.02286
10,2,-0.572221
10,3,-0.001511
20,1,1.451941
20,2,0.79231
20,3,1.322291
30,1,-1.457188


##### 5. Write a Pandas program to swap the levels of a MultiIndex DataFrame.

In [71]:
index = [
        np.arange(10) * 10,
        np.arange(5)
    ]

multi_index = pd.MultiIndex.from_product(index)

df = pd.DataFrame(
    {'value': np.random.randn(50)},
    index=multi_index
)
print('Original DataFrame:')
display(df.loc[10:20, :, :])
display(df.loc[10:20, :, :].swaplevel().sort_index())
display(df.loc[10:20, :, :].swaplevel(0,1).sort_index())
display(df.loc[10:20, :, :].swaplevel(1,0).sort_index())
display(df.loc[10:20, :, :].reset_index(names=[0,1]).set_index([1,0]).sort_index())

Original DataFrame:


Unnamed: 0,Unnamed: 1,value
10,0,-1.062598
10,1,-0.585863
10,2,-0.076948
10,3,-1.827181
10,4,0.013815
20,0,-1.156049
20,1,-0.699482
20,2,-0.205718
20,3,1.394287
20,4,1.087222


Unnamed: 0,Unnamed: 1,value
0,10,-1.062598
0,20,-1.156049
1,10,-0.585863
1,20,-0.699482
2,10,-0.076948
2,20,-0.205718
3,10,-1.827181
3,20,1.394287
4,10,0.013815
4,20,1.087222


Unnamed: 0,Unnamed: 1,value
0,10,-1.062598
0,20,-1.156049
1,10,-0.585863
1,20,-0.699482
2,10,-0.076948
2,20,-0.205718
3,10,-1.827181
3,20,1.394287
4,10,0.013815
4,20,1.087222


Unnamed: 0,Unnamed: 1,value
0,10,-1.062598
0,20,-1.156049
1,10,-0.585863
1,20,-0.699482
2,10,-0.076948
2,20,-0.205718
3,10,-1.827181
3,20,1.394287
4,10,0.013815
4,20,1.087222


Unnamed: 0_level_0,Unnamed: 1_level_0,value
1,0,Unnamed: 2_level_1
0,10,-1.062598
0,20,-1.156049
1,10,-0.585863
1,20,-0.699482
2,10,-0.076948
2,20,-0.205718
3,10,-1.827181
3,20,1.394287
4,10,0.013815
4,20,1.087222


##### 6. Write a Pandas program to reset the index of a MultiIndex DataFrame.

In [76]:
index = [
        np.arange(1, 11) * 10,
        np.arange(1, 6)
    ]

multi_index = pd.MultiIndex.from_product(index)

df = pd.DataFrame(
    {'value': np.random.randn(50)},
    index=multi_index
)

df.reset_index(names=['floor', 'cabinet'])

Unnamed: 0,floor,cabinet,value
0,10,1,-0.343223
1,10,2,-0.845076
2,10,3,-0.95555
3,10,4,0.856274
4,10,5,-0.704746
5,20,1,0.95358
6,20,2,0.20894
7,20,3,-0.113731
8,20,4,0.765627
9,20,5,-0.88836


##### 7. Write a Pandas program that uses .loc for indexing.

In [85]:
index = [
        np.arange(1, 11) * 10,
        np.arange(1, 6)
    ]

multi_index = pd.MultiIndex.from_product(index)

df = pd.DataFrame(
    {'value': np.random.randn(50)},
    index=np.arange(1, 51)
)

display(df.loc[10])
display(df.loc[10:20])

value    0.998263
Name: 10, dtype: float64

Unnamed: 0,value
10,0.998263
11,-1.835567
12,1.530657
13,-1.152718
14,-0.224427
15,-0.495569
16,0.536519
17,0.151592
18,0.970659
19,0.667054


##### 8. Write a Pandas program to use Boolean indexing to select rows where column 'x' > 6.

In [89]:
df = pd.DataFrame(
    {'X': np.arange(1, 11)}
)

display(df[df['X'] > 6])

Unnamed: 0,X
6,7
7,8
8,9
9,10


##### 9. Write a Pandas program to select the first three rows using iloc.

In [90]:
df = pd.DataFrame(
    {'X': np.arange(1, 11)}
)

display(df.iloc[:3])

Unnamed: 0,X
0,1
1,2
2,3


##### 10. Write a Pandas program to use .loc to select rows based on a condition

In [91]:
df = pd.DataFrame(
    {'X': np.arange(1, 11)}
)

display(df.loc[df['X'] % 2 == 0])

Unnamed: 0,X
1,2
3,4
5,6
7,8
9,10


##### 11. Write a Pandas program that uses .loc to set values in the DataFrame.

In [93]:
df = pd.DataFrame(
    {'X': np.arange(1, 11)}
)

df
df.loc[df['X'] % 2 == 0] = 0
df.loc[df['X'] % 2 == 1] = 1
display(df)

Unnamed: 0,X
0,1
1,0
2,1
3,0
4,1
5,0
6,1
7,0
8,1
9,0


##### 12. Write a Pandas program that uses .loc to slice DataFrame based on row and column labels.

In [98]:
df = pd.DataFrame(np.random.randn(10,5), columns=['A', 'B', 'C', 'D', 'E'], index=np.arange(1,11)*10)
display(df)
display(df.loc[20, 'A'])

display(df.loc[30:70, ['B', 'C', 'D']])

Unnamed: 0,A,B,C,D,E
10,1.597094,2.429674,0.2883,0.262291,1.025075
20,0.110093,-0.659426,0.758351,0.225638,-0.958455
30,-0.252941,-1.497609,0.525288,1.697346,0.049968
40,-0.306859,0.432706,-0.176411,-0.416319,0.133188
50,-0.804817,1.624705,1.340183,-0.841162,0.058175
60,-0.569594,-0.635086,1.935816,-0.026635,1.79561
70,-0.584609,-0.207922,-1.622112,-0.506256,-0.537322
80,-1.152982,-0.924074,0.206028,-0.530905,0.327956
90,0.216805,-2.033049,-0.348556,-0.062678,-1.454284
100,0.637924,1.254576,-1.806771,1.208824,0.156183


np.float64(0.11009259758651056)

Unnamed: 0,B,C,D
30,-1.497609,0.525288,1.697346
40,0.432706,-0.176411,-0.416319
50,1.624705,1.340183,-0.841162
60,-0.635086,1.935816,-0.026635
70,-0.207922,-1.622112,-0.506256


##### 13. Write a Pandas program to select rows where column 'X' > 5 and column 'Y' < 5.

In [105]:
df = pd.DataFrame(np.random.randint(1, 10, (10, 5)), columns=['A', 'B', 'C', 'X', 'Y'], index=np.arange(1,11)*10)
display(df)
display(df[(df['X'] > 5) & (df['Y'] < 5)])

Unnamed: 0,A,B,C,X,Y
10,1,8,1,7,9
20,9,2,7,3,4
30,8,5,5,8,8
40,5,6,9,9,9
50,9,8,2,7,5
60,6,2,1,4,4
70,7,3,5,5,1
80,8,8,2,6,4
90,2,5,9,5,7
100,1,7,5,7,2


Unnamed: 0,A,B,C,X,Y
80,8,8,2,6,4
100,1,7,5,7,2


##### 14. Write a Pandas program that uses .loc to slice a MultiIndex DataFrame.

In [112]:
df = pd.DataFrame(
    np.random.randint(1, 100, 25),
    index=pd.MultiIndex.from_product(
        [np.arange(1,6)*10, np.arange(1,6)]
    )
)

display(df.loc[20:30, 2:3, :])
display(df.loc[20:30, :, :])
display(df.loc[:, 2:3, :])

Unnamed: 0,Unnamed: 1,0
20,2,2
20,3,52
30,2,51
30,3,13


Unnamed: 0,Unnamed: 1,0
20,1,24
20,2,2
20,3,52
20,4,35
20,5,6
30,1,19
30,2,51
30,3,13
30,4,44
30,5,61


Unnamed: 0,Unnamed: 1,0
10,2,2
10,3,87
20,2,2
20,3,52
30,2,51
30,3,13
40,2,83
40,3,74
50,2,39
50,3,63


##### 15. Write a Pandas program that uses MultiIndex to select data based on conditions.

In [118]:
df = pd.DataFrame(
    np.random.randint(1, 100, 25),
    index=pd.MultiIndex.from_product(
        [np.arange(1,6)*10, np.arange(1,6)]
    )
)

display(df.loc[(20, slice(None))])

Unnamed: 0,0
1,42
2,67
3,33
4,20
5,5
