In [1]:
import numpy as np
from functools import reduce

# Numpy
The NumPy library contains multidimensional array and matrix data structures (you’ll find more information about this in later sections). It provides ndarray, a homogeneous n-dimensional array object, with methods to efficiently operate on it. NumPy can be used to perform a wide variety of mathematical operations on arrays. It adds powerful data structures to Python that guarantee efficient calculations with arrays and matrices and it supplies an enormous library of high-level mathematical functions that operate on these arrays and matrices. 

Why use NumPy?

NumPy arrays are faster and more compact than Python lists. An array consumes less memory and is convenient to use. NumPy uses much less memory to store data and it provides a mechanism of specifying the data types. This allows the code to be optimized even further.

(https://numpy.org/doc/stable/user/absolute_beginners.html)

# Basic Usage

## Defining arrays and basic operations
Create a 1-dimensional NumPy array with elements [1, 2, 3, 4, 5].

In [22]:
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

 Create a 2-dimensional NumPy array with shape (3, 3) containing all zeros.

In [23]:
z=np.ones((3,3))
print(z)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


Add 5 to every element of the array created

In [24]:
ar=z+5
ar

array([[6., 6., 6.],
       [6., 6., 6.],
       [6., 6., 6.]])

In [4]:
ar=np.array(list(range(0,9))).reshape(3,3)
ar

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [8]:
ar_list=[1,2,3,4,5]
[x+5 for x in ar_list]

[6, 7, 8, 9, 10]

Extract the second and third elements from the array.

In [11]:
# indexing
#ar[-1,-1]
#ar[0][2]
print(ar_list[1], ar_list[2])

2 3


Extract the last two rows of the array created in Exercise 1b.

In [19]:
ar=np.array(list(range(0,9))).reshape(3,3)

ar[1:,]

array([[3, 4, 5],
       [6, 7, 8]])

Calculate the sum of all elements in the array created 

In [20]:
np.sum(ar)

36

In [21]:
ar.shape

(3, 3)

Writing code to sum a 2d array

In [32]:
summ=0
for row in range(0, ar.shape[0]):
    for col in range(0, ar.shape[0]):
        summ += ar[row][col]
summ

36

Using numpy functions to sum arary

In [36]:
np.sum(ar)

36

In [37]:
summ=0
count=0
for row in range(0, ar.shape[0]):
    for col in range(0, ar.shape[0]):
        summ += ar[row][col]
        count+=1
summ
summ/count

4.0

In [38]:
np.mean(ar)

4.0

## Reshape
Reshape the array created in Exercise 1a to have shape (9, 1).

In [41]:
ar=np.array(
[0,1,2,3,4,5,6,7,8]
)
print(ar.shape)
ar

(9,)


array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [61]:
# arr_2d=ar.reshape(3,3)
arr_2d=ar.reshape(-1,3)
#arr_2d = arr_2d.reshape(1,9)
arr_2d

# print(1*3*3*1)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [62]:
ar

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [63]:
ar.reshape(1,9).reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [65]:
ar= list(range(1,10))
ar=np.array(ar).reshape(3,3)
ar

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

Transpose the array created in Exercise 1b.

In [66]:
ar=ar.reshape(-1,3)

In [60]:
ar=np.array([1,2,3,4,5,6]).reshape(2,3)
ar

array([[1, 2, 3],
       [4, 5, 6]])

In [61]:
np.transpose(ar)

array([[1, 4],
       [2, 5],
       [3, 6]])

In [3]:
arr1 = np.array([[['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']],
                 [['10', '11', '12'], ['13', '14', '15'], ['16', '17', '18']],
                 [['19', '20', '21'], ['22', '23', '24'], ['25', '26', '27']]])
arr1

array([[['1', '2', '3'],
        ['4', '5', '6'],
        ['7', '8', '9']],

       [['10', '11', '12'],
        ['13', '14', '15'],
        ['16', '17', '18']],

       [['19', '20', '21'],
        ['22', '23', '24'],
        ['25', '26', '27']]], dtype='<U2')

In [11]:
arr1.shape

(3, 3, 3)

In [6]:
arr1[1:3,1:3]

array([[['13', '14', '15'],
        ['16', '17', '18']],

       [['22', '23', '24'],
        ['25', '26', '27']]], dtype='<U2')

In [14]:
#Shape: 5*3*2

arr2 = np.array([[['#', '#', '#', '#', '#'],
  ['0', '0', '0', '0', '0'],
  ['#', '#', '#', '#', '#']],
 [['#', '#', '#', '#', '#'],
  ['#', '#', '#', '#', '#'],
  ['#', '#', '#', '#', '#']]])
arr2

array([[['#', '#', '#', '#', '#'],
        ['0', '0', '0', '0', '0'],
        ['#', '#', '#', '#', '#']],

       [['#', '#', '#', '#', '#'],
        ['#', '#', '#', '#', '#'],
        ['#', '#', '#', '#', '#']]], dtype='<U1')

In [15]:
arr2.shape

(2, 3, 5)

In [16]:
arr2[:1,1:2]

array([[['0', '0', '0', '0', '0']]], dtype='<U1')

In [19]:
arr3 = np.array(range(0,30))
arr3 = arr3.reshape(2, 3, 5)
arr3

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],

       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

In [21]:
arr3 = arr3.reshape(10, 1, 3)
arr3

array([[[ 0,  1,  2]],

       [[ 3,  4,  5]],

       [[ 6,  7,  8]],

       [[ 9, 10, 11]],

       [[12, 13, 14]],

       [[15, 16, 17]],

       [[18, 19, 20]],

       [[21, 22, 23]],

       [[24, 25, 26]],

       [[27, 28, 29]]])

## Timing

In [3]:
my_list=list(range(100000))
np_arr=np.array(my_list)

In [4]:
%%timeit -n 100
[x+5 for x in my_list]

30.3 ms ± 9.86 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
%%timeit -n 100
np_arr+5

194 µs ± 42.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%%timeit -n 100
list_sum=0
for i in my_list:
    list_sum+=i

31.5 ms ± 1.32 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%%timeit -n 100
np.sum(np_arr)

63.8 µs ± 6.37 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Basic Usage Extended

## Example 1: Creating random arrays
a) Create a 2-dimensional NumPy array with shape (4, 5) containing random integers between 1 and 10.

b) Flatten the array created in Exercise 1a to a 1-dimensional array.

In [26]:
ar = np.random.randint(1,10, (4,5))
ar

array([[4, 9, 4, 3, 7],
       [4, 8, 9, 1, 9],
       [1, 6, 7, 7, 3],
       [8, 1, 4, 8, 9]])

In [27]:
#ar.reshape(1,20)
ar.reshape(-1) #right method

array([4, 9, 4, 3, 7, 4, 8, 9, 1, 9, 1, 6, 7, 7, 3, 8, 1, 4, 8, 9])

In [37]:
np.where(ar>5)

(array([0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], dtype=int64),
 array([1, 4, 1, 2, 4, 1, 2, 3, 0, 3, 4], dtype=int64))

## Example 2: Filtering
a) Find the indices of all elements in the array created in Exercise 1a that are greater than 5.

b) Filter the array created in Exercise 1a to only keep the elements that are divisible by 2

In [32]:
ar_g5 = ar > 5

lis=[]
for row in range(ar.shape[0]):
    for col in range(ar.shape[1]):
        if ar_g5[row][col] == True:
            print(row, col)

0 1
0 4
1 1
1 2
1 4
2 1
2 2
2 3
3 0
3 3
3 4


In [35]:
np.where(ar>5)

(array([0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], dtype=int64),
 array([1, 4, 1, 2, 4, 1, 2, 3, 0, 3, 4], dtype=int64))

In [38]:
print(ar)

[[4 9 4 3 7]
 [4 8 9 1 9]
 [1 6 7 7 3]
 [8 1 4 8 9]]


In [39]:
#ar[ar > 5]

ar[ar > 0]#.reshape(4,5)

array([4, 9, 4, 3, 7, 4, 8, 9, 1, 9, 1, 6, 7, 7, 3, 8, 1, 4, 8, 9])

In [40]:
ar[ar % 2 != 0]

array([9, 3, 7, 9, 1, 9, 1, 7, 7, 3, 1, 9])

# dims

In NumPy, the axis parameter is used to specify the axis along which an operation should be performed. It is an important concept in array operations and plays a significant role in determining the dimensions over which functions like sum, mean, maximum, and minimum are applied.

The axis parameter accepts an integer or tuple of integers that represent the axes of the array. Here are some key points to understand how axis works:

1.    1-D Arrays:
        For a 1-dimensional array, the axis parameter has no effect since there is only one dimension.

2.    Multi-Dimensional Arrays:
        NumPy arrays can have multiple dimensions. For example, a 2-dimensional array has two axes: axis=0 represents the rows, and axis=1 represents the columns.
        When performing operations along an axis, the specified axis is "collapsed" or "reduced" while applying the operation, resulting in a reduced dimensionality.
        The axis parameter can take negative values, where -1 represents the last axis, -2 represents the second last axis, and so on.

3.    Functions that Utilize the axis Parameter:
        Functions like np.sum(), np.mean(), np.max(), np.min(), etc., accept the axis parameter to specify the axis along which the operation is performed.
        When axis is not provided, the operation is performed on the entire array, resulting in a scalar value.
        When axis is specified, the operation is performed along that axis, resulting in an array with reduced dimensions.

In [9]:
arr = np.array([[1, 2, 3],
                [4, 5, 6]])

print(arr, arr.shape)

[[1 2 3]
 [4 5 6]] (2, 3)


In [4]:
# Sum along axis=0 (rows are collapsed)
row_sum = np.sum(arr, axis=0)
print(row_sum, row_sum.shape)


[5 7 9] (3,)


In [5]:
# Mean along axis=1 (columns are collapsed)
column_sum = np.sum(arr, axis=1)
print(column_sum, column_sum.shape)

[ 6 15] (2,)


In [10]:
# Maximum along columns
max_columns = np.max(arr, axis=1)
print(max_columns)
print(max_columns.shape)

[3 6]
(2,)


## Exercises 

### Ex 1
Consider the following 2-dimensional NumPy array:

a) Calculate the sum of each row of the array using the axis parameter.
    
b) Calculate the product of each column of the array using the axis parameter.

For the following arrays:
1. Array 1
```python
arr = np.array([[[1, 2, 3],
                 [4, 5, 6]],
                [[7, 8, 9],
                 [10, 11, 12]]])
```
2. Array 2
```python
ar= list(range(1,28))
ar=np.array(ar).reshape(3,3,3)
ar.shape
```


a) Calculate the sum of each 2x3 sub-array along axis=0.

b) Calculate the maximum value in each row of the sub-arrays along axis=1.

c) Calculate the mean value across all elements of the array.

In [14]:
arr1 = np.array([[1, 2],
                 [3, 4]])
arr2 = np.array([[5, 6],
                 [7, 8]])


# Broadcasting

In NumPy, broadcasting refers to the automatic alignment and operation on arrays with different shapes and sizes. It allows you to perform element-wise operations between arrays of different dimensions without explicitly writing loops. Broadcasting follows a set of rules to determine how the shapes of arrays can be matched and the operation can be applied efficiently.

The broadcasting rules in NumPy are as follows:

    Rule 1: If the arrays have different numbers of dimensions, the shape of the one with fewer dimensions is padded with ones on the left until the dimensions match.

    Rule 2: If the shapes of the arrays do not match in any dimension, the array with shape equal to 1 in that dimension is stretched or "broadcast" to match the other array's shape.

    Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised, indicating an incompatible shape.

By applying these rules, NumPy can perform element-wise operations between arrays of different shapes, making code concise and efficient.

Here are a few examples to illustrate broadcasting:

In [129]:
arr = np.array([1, 2, 3])
scalar = 2
result = arr * scalar
print(result)
# Output: [2 4 6]

[2 4 6]


In this example, the scalar value 2 is broadcasted to match the shape of the 1-D array arr, and element-wise multiplication is performed.

In [133]:
arr1 = np.array([[1, 2, 3],
                 [4, 5, 6]])
arr2 = np.array([1, 2, 3])
result = arr1 + arr2

print(arr1.shape)
print(arr2.shape)
# [[1, 2, 3]
# [1, 2, 3]], (2,3)
print(result)


(2, 3)
(3,)
[[2 4 6]
 [5 7 9]]


In this example, the 1-D array arr2 is broadcasted to match the shape of the 2-D array arr1, and element-wise addition is performed.

## Exercises 

a) Add arr1 and arr2 using broadcasting.

b) Multiply arr1 and arr2 using broadcasting.

In [142]:
arr1 = np.array([1, 2, 3]) #(1,3)
arr2 = np.array([[4], #(3,1)
                 [5],
                 [6]])



[[5 6 7]
 [6 7 8]
 [7 8 9]]


# Expand dims

NumPy's expand_dims function is used to increase the dimensions of an array by inserting a new axis at a specified position. It is particularly useful when you want to reshape or manipulate arrays to match the desired shape or broadcasting requirements. The expanded axis will have a size of 1, and it allows for easier compatibility with other arrays during mathematical operations.

The expand_dims function takes two parameters: a (the input array) and axis (the position where the new axis should be inserted). Here's the syntax:

`numpy.expand_dims(a, axis)`

The a parameter represents the input array that you want to expand, and the axis parameter specifies the position along which the new axis should be inserted. The axis value can be a positive integer or a negative integer to index from the end of the array.

Here are a few examples to illustrate the usage of expand_dims:

In [2]:
ar=np.array([1,2,3])
print(ar, ar.shape)

[1 2 3] (3,)


In [3]:
np.expand_dims(ar, )

array([[1, 2, 3]])

In [40]:
ar=np.array([[1,2,3]])
print(ar, ar.shape)

[[1 2 3]] (1, 3)


In [42]:
np.expand_dims(ar, 1)

array([[[1, 2, 3]]])

In [43]:
arr = np.array([1, 2, 3, 4, 5]) # (5)
expanded_arr = np.expand_dims(arr, axis=1)
print(expanded_arr.shape)  # Output: (5, 1)

(5, 1)


In this example, the expand_dims function is used to add a new axis to the 1-D array arr, resulting in a 2-D array with shape (5, 1).

In [46]:
arr = np.array([[1, 2], # 2,2
                [3, 4]])
expanded_arr = np.expand_dims(arr, axis=1)
print(expanded_arr.shape)  # ,2,1,2
expanded_arr[:,0,:]

(2, 1, 2)


array([[1, 2],
       [3, 4]])

## Exercises 

### Ex 1
a) Use expand_dims to convert arr into a 2-D array with shape (4, 1).

b) Use expand_dims to convert arr into a 2-D array with shape (4, 1, 1).

In [27]:
arr = np.array([1, 2, 3, 4])
arr

array([1, 2, 3, 4])

In [28]:
# a) Use expand_dims to convert arr into a 2-D array with shape (4, 1).

d2 = np.expand_dims(arr, 1)
d2.shape

(4, 1)

In [29]:
# b) Use expand_dims to convert arr into a 2-D array with shape (4, 1, 1).

d3 = np.expand_dims(arr, (1,2))
d3.shape

(4, 1, 1)

### Ex 2
a) Use expand_dims to convert arr into a 3-D array with shape (2, 3, 1).

b) Use expand_dims to convert arr into a 4-D array with shape (2, 1, 3, 1).

In [58]:
# a) Use expand_dims to convert arr into a 3-D array with shape (2, 3, 1).

ar = np.array([[2, 3, 8],
               [4, 5, 9]])

exp = np.expand_dims(ar, 2)
#print(exp)
exp.shape

(2, 3, 1)

In [63]:
# b) Use expand_dims to convert arr into a 4-D array with shape (2, 1, 3, 1).

ar1 = np.array(range(1,7)).reshape(2,3)
exp1 = np.expand_dims(ar, (1,3))
#print(exp1)
exp1.shape

(2, 1, 3, 1)

# hstack, vstack and concat

In NumPy, vstack, hstack, and concatenate are functions used for combining or concatenating arrays along different axes. They provide flexibility when it comes to joining arrays of different shapes or sizes. Here's an explanation of each function:

## vstack
np.vstack: The vstack function is used to vertically stack or concatenate arrays. It takes a sequence of arrays as input and returns a single array with an increased number of rows. The arrays must have the same number of columns (dimension along the horizontal axis). For example:

In [69]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

result = np.vstack((arr1, arr2))
print(result) #(2,3)
np.concatenate((arr1, arr2), axis=0) #(1,6)

[[1 2 3]
 [4 5 6]]


array([1, 2, 3, 4, 5, 6])

In [None]:
# Output:
# [[1 2 3]
#  [4 5 6]]

## hstack
np.hstack: The hstack function is used to horizontally stack or concatenate arrays. It takes a sequence of arrays as input and returns a single array with an increased number of columns. The arrays must have the same number of rows (dimension along the vertical axis). For example:

In [70]:

arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

result = np.hstack((arr1, arr2))
print(result)
# Output: [1 2 3 4 5 6]
np.concatenate((arr1, arr2), axis=0)

[1 2 3 4 5 6]


array([1, 2, 3, 4, 5, 6])

## concatenate
np.concatenate: The concatenate function is a more general function that can concatenate arrays along any axis. It takes a sequence of arrays as input and allows you to specify the axis along which the concatenation should occur using the axis parameter. If no axis parameter is provided, the arrays are flattened and concatenated into a 1-D array. For example:

In [73]:
arr1 = np.array([[1, 2, 3]])
arr2 = np.array([[4, 5, 6]])
np.concatenate((arr1, arr2), axis=1)

array([[1, 2, 3, 4, 5, 6]])

In [76]:
arr1 = np.array([[1, 2], #(2,2)
                 [3, 4]])
arr2 = np.array([[5, 6], # (2,2)
                 [7, 8]])

result = np.concatenate((arr1, arr2), axis=1) # (2,4)
print(result)
result.shape

# Output: # (4,2) 
# [[1 2]
#  [3 4]
#  [5 6]
#  [7 8]]

[[1 2 5 6]
 [3 4 7 8]]


(2, 4)

It's worth noting that vstack, hstack, and concatenate can handle arrays of different shapes as long as the non-concatenated axes are compatible. If the shapes of the arrays are not compatible, you may encounter a ValueError indicating an incompatible shape.

## Exercises 

In [82]:
arr1 = np.array([[1, 2, 3],
                 [4, 5, 6]]) #(2,3)
arr2 = np.array([[7, 8, 9], # (2,3)
                 [10, 11, 12]])
arr3 = np.array([[13, 14], #(2,2)
                 [15, 16]])

- Use vstack to vertically stack arr1 and arr2, and assign the result to a new variable vstack_result.

- Use hstack to horizontally stack arr1 and arr2, and assign the result to a new variable hstack_result.

- Use concatenate to concatenate arr1 and arr3 along the horizontal axis (axis=1), and assign the result to a new variable concat_result.

- Print the vstack_result, hstack_result, and concat_result to verify the results.

In [83]:
# Use vstack to vertically stack arr1 and arr2, and assign the result to a new variable vstack_result.

vstack_result = np.vstack((arr1 ,arr2)) # (4,3)

print(vstack_result)
vstack_result.shape

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


(4, 3)

In [85]:
# Use hstack to horizontally stack arr1 and arr2, and assign the result to a new variable hstack_result.

hstack_result = np.hstack((arr1 ,arr2)) # (2,6)

print(hstack_result)
hstack_result.shape

[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]


(2, 6)

In [87]:
# Use concatenate to concatenate arr1 and arr3 along the horizontal axis (axis=1), and assign the result to a new variable concat_result.

concat_result = np.concatenate((arr1, arr2), axis=1)
print(concat_result)
concat_result.shape

[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]


(2, 6)

In [94]:
# Print the vstack_result, hstack_result, and concat_result to verify the results.

print(f' Stack: \n {vstack_result} \n Shape: {vstack_result.shape} \n')
print(f' Stack: \n {hstack_result} \n Shape: {hstack_result.shape}\n')
print(f' Stack: \n {concat_result} \n Shape: {concat_result.shape}')

 Stack: 
 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]] 
 Shape: (4, 3) 

 Stack: 
 [[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]] 
 Shape: (2, 6)

 Stack: 
 [[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]] 
 Shape: (2, 6)


In [80]:
arr1 = np.array([[[1, 2, 3],
                  [4, 5, 6]], # (2,2,3)
                 
                 [[7, 8, 9],
                  [10, 11, 12]]])

arr2 = np.array([[[13, 14, 15], # (2,2,3)
                  [16, 17, 18]],
                 
                 [[19, 20, 21],
                  [22, 23, 24]]])

print(arr1.shape)


(2, 2, 3)


In [221]:
arr1 = np.array([[[[1, 2],
                   [3, 4]],
                  
                  [[5, 6],
                   [7, 8]]],

                 [[[9, 10],
                   [11, 12]],
                  
                  [[13, 14],
                   [15, 16]]]])

arr2 = np.array([[[[17, 18],
                   [19, 20]],
                  
                  [[21, 22],
                   [23, 24]]],

                 [[[25, 26],
                   [27, 28]],
                  
                  [[29, 30],
                   [31, 32]]]])

print(arr1.shape)

(2, 2, 2, 2)


(2, 2, 2, 4)

# reordering dims

In NumPy, you can use the transpose function to reorder or swap the dimensions of an array. These functions allow you to manipulate the shape of the array according to your requirements. 

np.transpose: The transpose function is used to permute the dimensions of an array. It takes a tuple of axes as input and returns a view of the array with the axes permuted. The order of the axes in the tuple determines the order of the dimensions in the output array. For example:

In [228]:
arr = np.zeros((2,3))
print(arr.shape)
np.transpose(arr).shape

(2, 3)


(3, 2)

In [229]:
arr=np.array([[1,2,3]]) # (1,3) 
np.transpose(arr, (1,0)) # (3,1)

array([[1],
       [2],
       [3]])

In [26]:
arr = np.array([[1, 2, 3],
                [4, 5, 6]])
transposed_arr = np.transpose(arr, (1, 0))
print(transposed_arr)
# Output:
# [[1 4]
#  [2 5]
#  [3 6]]


[[1 4]
 [2 5]
 [3 6]]


## Exercises 

In [231]:
arr = np.array([[[1, 2],
                 [3, 4]], # (2, 2,2)
                [[5, 6],
                 [7, 8]]])

arr.shape


array([[[1, 2],
        [5, 6]],

       [[3, 4],
        [7, 8]]])

## Ex 1
Use transpose to reorder the dimensions of arr such that the second dimension becomes the first dimension, and the first dimension becomes the second dimension. Assign the result to a new variable reordered_arr.

In [234]:
arr = np.zeros((2,3,4,5))

arr.shape


(5, 2, 4, 3)

## Ex 2
Use transpose  to reorder the dimensions of arr such that the second dimension becomes the first dimension, and the first dimension becomes the second dimension. Assign the result to a new variable reordered_arr.

# File Reading

In [73]:
with open('test.npy', 'wb') as f:
    np.save(f, np.array([1, 2]))
    np.save(f, np.array([1, 3]))


In [75]:
with open('test.npy', 'rb') as f:
    a = np.load(f)
    b = np.load(f)
    
print(a)

[1 2]


# Tasks
Each task has a `.npy` dataset

## Task 1
Suppose you have a 2D array with different households' incomes and family sizes. Using NumPy, can you calculate the average tax paid per person for each household type.

Each dimension corresponds to:
- First dimension: Number of members in household
- Second dimension: Total tax for the househould

Do the following:
- View the number of members and total tax for the last household
- Identify the household with the most tax
- Identify the household with the least tax per person
- Calculate the total tax paid by all households

In [37]:
with open('taxable_data_2d_task1.npy', 'rb') as f:
    a = np.load(f)
print("Shape: ", a.shape)
print("Dim:   ", a.ndim)

Shape:  (10, 2)
Dim:    2


In [32]:
# a: Number of members and total tax for the last household
a[-1]

array([    2, 77254], dtype=int64)

In [44]:
# b: Identify the household with the most tax
m = np.argmax(a[:,1])
a[m]

array([    1, 98723], dtype=int64)

In [50]:
# another method
m = np.argmax(a, axis=0)[1]
a[m]

array([    1, 98723], dtype=int64)

In [36]:
# c: Identify the household with the least tax per person
l = a[:,1]/a[:,0]
z = np.argmin(l)
a[z]

array([    4, 55906], dtype=int64)

In [38]:
# d: Calculate the total tax paid by all households
tax = np.sum(a[:,1])
tax

681261

## Task 2
Imagine you have a 3D array with different scenarios for income, deductions, and exemptions for multiple individuals. Can you use NumPy to compare the total tax liability across scenarios and individuals?

Each dimension corresponds to specific aspects of the data:
- Tax Years
- Individuals
- Financial data (3 types of data: income, deductions and exemptions)

Do the following:
- For the first individual, view the income throughout the years
- For the last individual, view their average exemptions
- For each individual, view their average income, deductions and exemptions

In [52]:
with open('taxable_data_3d_task2.npy', 'rb') as f:
    a = np.load(f)
print("Shape: ", a.shape)
print("Dim:   ", a.ndim)

Shape:  (5, 20, 3)
Dim:    3


In [53]:
# a: For the first individual, view the income throughout the years

a[:,0,0]

array([39215, 75746, 62578, 30135, 56652], dtype=int64)

In [55]:
# For the last individual, view their average exemptions

z = np.mean(a[:,-1,-1])
z

2924.8

In [81]:
# For each individual, view their average income, deductions and exemptions

s = np.mean(a, axis=0)
s

array([[52865.2,  5293. ,  3185.4],
       [53443. ,  5418. ,  2935.6],
       [60511.8,  5434.2,  2823.4],
       [61456. ,  3775.4,  1560.8],
       [62886.4,  4926.8,  2091.8],
       [64312.6,  5086. ,  2109.6],
       [57545.8,  5056.4,  2083.8],
       [52078.2,  5819. ,  3044. ],
       [59903. ,  6057.4,  1967.2],
       [73085.6,  4967. ,  2462.2],
       [55843.8,  5681.8,  2464.2],
       [62954.6,  4705.2,  2252.4],
       [60556.6,  5211.4,  2649.6],
       [70738.2,  4536. ,  2880.2],
       [65284.4,  5230. ,  2506.6],
       [89338.8,  6224.8,  2246.6],
       [55269.4,  4486. ,  1706.8],
       [53823.6,  4025.6,  2775.4],
       [77010. ,  6116.8,  2644.4],
       [62764.8,  3700.2,  2924.8]])

## Task 3

Imagine a 5D array where the dimensions represent taxpayers, tax years, income categories, deductions, and exemptions. Can you use NumPy to create a data cube that allows you to analyze tax data from multiple perspectives?

In this example, we've used random integer values to populate the array. Each dimension corresponds to specific aspects of the data:

- First dimension: Individuals (5 individuals)
- Second dimension: Tax years (5 years)
- Third dimension: Transaction types (2 types, e.g., stocks and real estate)
- Fourth dimension: Income components (2 components, e.g., selling price and purchase price)
- Fifth dimension: Deductions (2 deductions, e.g., transaction fees and capital improvement costs)

Do the following:
- View all stocks selling price from the first individual for the first year
- View all real estate selling price from the first individual for the last year
- For each individual, calculate total transaction fees for purchase price over the years

In [3]:
with open('taxable_data_5d_task3.npy', 'rb') as f:
    a = np.load(f)
print("Shape: ", a.shape)
print("Dim:   ", a.ndim)

Shape:  (5, 5, 2, 2, 2)
Dim:    5


In [6]:
# a: View all stocks selling price from the first individual for the first year

a[0,0,0,0,:]

array([2674, 9548], dtype=int64)

In [7]:
# b: View all real estate selling price from the first individual for the last year

a[0,-1,1,0,:]

array([9586, 7813], dtype=int64)

In [9]:
# c: For each individual, calculate total transaction fees for purchase price over the years

temp = a[:, #Individuals
         :, #years
         :, #Transaction types (2 types, e.g., stocks and real estate)
         1, #Income components (purchase price)
         0] #Deductions (transaction fees)

np.sum(temp, axis=2)

array([[ 6708,  9648,  8565,  5922, 10107],
       [ 9129, 10609, 11273,  7891, 11326],
       [10606, 15361, 16218,  8879,  8012],
       [16262, 13636, 13018, 10572, 15576],
       [12372,  6875, 15648, 15138,  6222]], dtype=int64)