Practice Questions for numpy

In [3]:
import numpy as np
# 1️⃣ Define two numpy arrays A and B and stack them vertically and horizontally
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# Vertical stack (one on top of the other)
v_stack = np.vstack((A, B))
# Horizontal stack (side by side)
h_stack = np.hstack((A, B))

print("Vertical Stack:\n", v_stack)
print("Horizontal Stack:\n", h_stack)

Vertical Stack:
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]
Horizontal Stack:
 [[1 2 5 6]
 [3 4 7 8]]


In [4]:
# 2️⃣ Find common elements between A and B
common = np.intersect1d(A, B)
print("Common elements:", common)

Common elements: []


In [5]:
# 3️⃣ Extract all numbers from A that are within a specific range (5 to 10)
A = np.array([2, 5, 6, 9, 12])
result = A[(A >= 5) & (A <= 10)]
print("Elements between 5 and 10:", result)

Elements between 5 and 10: [5 6 9]


In [6]:
# 4️⃣ Filter rows of iris_2d where petal length (3rd column) > 1.5
#     and sepal length (1st column) < 5.0
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

filtered = iris_2d[(iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)]
print("Filtered iris_2d rows:\n", filtered)

Filtered iris_2d rows:
 [[4.8 3.4 1.6 0.2]
 [4.8 3.4 1.9 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [4.9 2.4 3.3 1. ]
 [4.9 2.5 4.5 1.7]]


Practice Questions for Pandas

In [9]:
import pandas as pd
# 1️⃣ Filter the 'Manufacturer', 'Model', and 'Type' columns for every 20th row starting from row 0
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

filtered = df.loc[::20, ['Manufacturer', 'Model', 'Type']]
print("Every 20th row with selected columns:\n", filtered)

Every 20th row with selected columns:
    Manufacturer    Model     Type
0         Acura  Integra    Small
20     Chrysler  LeBaron  Compact
40        Honda  Prelude   Sporty
60      Mercury   Cougar  Midsize
80       Subaru   Loyale    Small


In [11]:
# 2️⃣ Replace missing values in 'Min.Price' and 'Max.Price' columns with their respective mean
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

df['Min.Price'].fillna(df['Min.Price'].mean(), inplace=True)
df['Max.Price'].fillna(df['Max.Price'].mean(), inplace=True)

print("\nColumns after filling missing values with mean:\n", df[['Min.Price', 'Max.Price']].head())


Columns after filling missing values with mean:
    Min.Price  Max.Price
0  12.900000  18.800000
1  29.200000  38.700000
2  25.900000  32.300000
3  17.118605  44.600000
4  17.118605  21.459091


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Min.Price'].fillna(df['Min.Price'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Max.Price'].fillna(df['Max.Price'].mean(), inplace=True)


In [12]:
# 3️⃣ Get the rows of a dataframe where the row sum is greater than 100
df_random = pd.DataFrame(np.random.randint(10, 40, 60).reshape(-1, 4), columns=list('ABCD'))

filtered_rows = df_random[df_random.sum(axis=1) > 100]
print("\nRows with sum greater than 100:\n", filtered_rows)


Rows with sum greater than 100:
      A   B   C   D
2   13  35  25  36
3   32  21  30  34
5   18  24  23  37
7   35  14  36  22
9   26  33  22  28
11  23  17  29  33
12  28  20  32  21
13  24  36  34  13
