In [2]:
import numpy as np
import pandas as pd

### NumPy Basics

In [3]:
# 1. Create a 1D array of numbers from 0 to 9 (~1 line)

q1 = np.arange(10)

print(q1)

[0 1 2 3 4 5 6 7 8 9]


In [4]:
# 2. Create a 3×3 numpy array of all True’s (~1 line)

q2 = np.ones((3,3), dtype=bool)
print(q2)

[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]


In [5]:
# 3. Given an array as input, print only odd numbers as output (~1 line)

c = np.arange(10)
q3=c[c%2==1]
print(q3)

[1 3 5 7 9]


In [6]:
# 4. Convert a 1D array to a 2D array with 2 rows (~1 line)

e = np.arange(10)
q4=e.reshape(2,-1)
print(q4)

[[0 1 2 3 4]
 [5 6 7 8 9]]


In [7]:
# 5. How to stack two arrays vertically? (~1 line)

a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)
q5 = np.vstack([a,b])
print(q5)

[[0 1 2 3 4]
 [5 6 7 8 9]
 [1 1 1 1 1]
 [1 1 1 1 1]]


In [8]:
# 6. How to generate custom sequences in numpy without hardcoding? (~1 line)
# sequence: [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

j = np.array([1,2,3])
q6 = np.r_[np.repeat(j,3), np.tile(j,3)]
print(q6)

[1 1 1 2 2 2 3 3 3 1 2 3 1 2 3 1 2 3]


In [9]:
# 7. Get the common items between a and b (~1 line)

a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
q7 = np.intersect1d(a,b)
print(q7)

[2 4]


In [10]:
# 8. From array a remove all items present in array b (~1 line)

a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])
q8 = np.setdiff1d(a,b)
print(q8)

[1 2 3 4]


In [11]:
# 9. Get all items between 5 and 10 from given array (~1 line)

a = np.array([2, 6, 1, 9, 10, 3, 27])
q9 = a[(a>5) & (a<10)]
print(q9)

[6 9]


In [12]:
# 10. Convert the function maxx that works on two scalars, to work on two arrays. (~1 line)

def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y
    
maxx(1, 5)

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])
q10 = np.vectorize(maxx)(a,b)
print(q10)

[6 7 9 8 9 7 5]


In [13]:
# 11. Swap columns 1 and 2 in the array arr. (~1 line)

q11 = np.arange(9).reshape(3,3)
q11[:,[0,1]] = q11[:,[1,0]]
print(q11)

[[1 0 2]
 [4 3 5]
 [7 6 8]]


In [14]:
# 12. Swap rows 1 and 2 in the array arr (~1 line)

q12 = np.arange(9).reshape(3,3)
q12[[0,1],:] = q12[[1,0],:]
print(q12)

[[3 4 5]
 [0 1 2]
 [6 7 8]]


In [15]:
# 13. Reverse the rows of a 2D array arr (~1 line)

arr = np.arange(9).reshape(3,3)
q13 = arr[::-1]
print(q13)

[[6 7 8]
 [3 4 5]
 [0 1 2]]


In [16]:
# 14. Reverse the columns of a 2D array arr (~1 line)

arr = np.arange(9).reshape(3,3)
q14 = arr[:,::-1]
print(q14)

[[2 1 0]
 [5 4 3]
 [8 7 6]]


In [17]:
# 15. Limit the number of items printed in python numpy array a to a maximum of 6 elements. (~1 line)

a = np.arange(15)
np.set_printoptions(threshold=6)
a

array([0, 1, 2, 3, 4, 5, 6])

### Pandas Basics

In [18]:
# 1. Read dataframe from file 'pandas.csv', (no index) into a variable called q1 (~1 line)

q1 = pd.read_csv('data.csv',index_col=0)

q1

Unnamed: 0,name,age,number_of_pets
0,Alice,23.0,2
1,Bob,19.0,0
2,Charlie,51.0,1
3,Dan,47.0,5
4,Edith,64.0,1
5,Frank,68.0,2
6,George,,3
7,Hannah,33.0,0
8,Ingrid,41.0,2
9,Jerry,38.0,1


In [19]:
# 2. Return the first 3 rows of the DataFrame df.

q2 = q1.head(3)

q2

Unnamed: 0,name,age,number_of_pets
0,Alice,23.0,2
1,Bob,19.0,0
2,Charlie,51.0,1


In [20]:
# 3. Select just the 'name' and 'age' columns from the DataFrame df. (~1 line)

q3 = q1[['name','age']]

q3

Unnamed: 0,name,age
0,Alice,23.0
1,Bob,19.0
2,Charlie,51.0
3,Dan,47.0
4,Edith,64.0
5,Frank,68.0
6,George,
7,Hannah,33.0
8,Ingrid,41.0
9,Jerry,38.0


In [21]:
# 4. Select the data in rows [3, 4, 8] and in columns ['name', 'age', 'number_of_pets']. (~1 line)

q4 = q1.loc[[3,4,8],['name','age','number_of_pets']]

q4

Unnamed: 0,name,age,number_of_pets
3,Dan,47.0,5
4,Edith,64.0,1
8,Ingrid,41.0,2


In [22]:
# 5. Select only the rows where the number of pets is greater than 3. (~1 line)

q5 = q1[q1['number_of_pets']>3]

q5

Unnamed: 0,name,age,number_of_pets
3,Dan,47.0,5


In [23]:
# 6. Select the rows where the age is missing, i.e. it is NaN. (~1 line)

q6 = q1[q1['age'].isnull()]

q6

Unnamed: 0,name,age,number_of_pets
6,George,,3


In [24]:
# 7. Select the rows where the student name is not Dan or Alice. (~1 line)

q7 = q1[~q1['name'].isin(['Dan','Alice'])]

q7

Unnamed: 0,name,age,number_of_pets
1,Bob,19.0,0
2,Charlie,51.0,1
4,Edith,64.0,1
5,Frank,68.0,2
6,George,,3
7,Hannah,33.0,0
8,Ingrid,41.0,2
9,Jerry,38.0,1


In [25]:
# 8. Select the rows the age is between 30 and 50 (inclusive). (~1 line)

q8 = q1[(q1['age']>=30) & (q1['age']<=50)]

q8

Unnamed: 0,name,age,number_of_pets
3,Dan,47.0,5
7,Hannah,33.0,0
8,Ingrid,41.0,2
9,Jerry,38.0,1


In [26]:
# 9. Find the total age of all students. (~1 line)

q9 = q1['age'].sum()

q9

384.0

In [27]:
# 10. Calculate the mean age of all students. (~1 line)

q10 = q1['age'].mean()

q10

42.666666666666664

In [28]:
# 11. Count the number of students. (~1 line)

q11 = q1['name'].count()

q11

10

In [29]:
# 12. Sort the DataFrame first by the values in the 'age' in decending order, then by the value in the 'name' column in ascending order. (~1 line)

q12 = q1.sort_values(by=['age','name'],ascending=[False,True])

q12

Unnamed: 0,name,age,number_of_pets
5,Frank,68.0,2
4,Edith,64.0,1
2,Charlie,51.0,1
3,Dan,47.0,5
8,Ingrid,41.0,2
9,Jerry,38.0,1
7,Hannah,33.0,0
0,Alice,23.0,2
1,Bob,19.0,0
6,George,,3


In [30]:
# 13. Add a new column, 'course', that contains the value 'big data'. (~1 line)

q13 = q1.assign(course='big data')

q13

Unnamed: 0,name,age,number_of_pets,course
0,Alice,23.0,2,big data
1,Bob,19.0,0,big data
2,Charlie,51.0,1,big data
3,Dan,47.0,5,big data
4,Edith,64.0,1,big data
5,Frank,68.0,2,big data
6,George,,3,big data
7,Hannah,33.0,0,big data
8,Ingrid,41.0,2,big data
9,Jerry,38.0,1,big data


In [31]:
# 14. Delete the 'number_of_pets' column from the DataFrame. (~1 line)

q14 = q1.drop(columns='number_of_pets')

q14

Unnamed: 0,name,age
0,Alice,23.0
1,Bob,19.0
2,Charlie,51.0
3,Dan,47.0
4,Edith,64.0
5,Frank,68.0
6,George,
7,Hannah,33.0
8,Ingrid,41.0
9,Jerry,38.0


In [32]:
# 15. Delete the row where the age is missing. (~1 line)

q15 = q1.dropna()

q15

Unnamed: 0,name,age,number_of_pets
0,Alice,23.0,2
1,Bob,19.0,0
2,Charlie,51.0,1
3,Dan,47.0,5
4,Edith,64.0,1
5,Frank,68.0,2
7,Hannah,33.0,0
8,Ingrid,41.0,2
9,Jerry,38.0,1
