# Numpy Exercices

All the exercises in this section must be 100% Numpy. Each exercise should be at most 1-3 lines of code (so you should use Numpy's functions).

In [None]:
import numpy as np

## Initialize a null vector (with zeroes) of length 20.

In [1]:
np.zeros(20)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

## Initialize a null matrix (with zeros) with size 3x2.

In [2]:
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

## Initialize a vector with 10 random numbers between 0 and 1 (continuous, e.g. 0.01, 0.54, etc.).

In [8]:
v = np.random.random(10)
v

array([0.57285348, 0.90757159, 0.16290208, 0.56451768, 0.72194142,
       0.69789873, 0.44909756, 0.10714073, 0.24303745, 0.97388981])

## Replace the minimum element of the previous vector with a 0.

In [10]:
v[v.argmin()] = 0
v

array([0.57285348, 0.90757159, 0.16290208, 0.56451768, 0.72194142,
       0.69789873, 0.44909756, 0.        , 0.24303745, 0.97388981])

## Create a vector with all consecutive values between 12 and 38.

In [6]:
np.arange(12, 39)

array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
       29, 30, 31, 32, 33, 34, 35, 36, 37, 38])

## Initialize a vector of length 10 with integer random numbers between 1 and 20.

In [11]:
v = np.random.randint(20, size=10)
v

array([ 8,  3,  7, 10, 17, 19, 11,  3,  2,  2])

a) Filter the elements that are greater than 3 and less than 12.

In [12]:
v[(v > 3) & (v < 12)]

array([ 8,  7, 10, 11])

b) Filter the elemetns that are less than or equal to 5 or greater than 15.

In [13]:
v[(v < 5) | (v > 15)]

array([ 3, 17, 19,  3,  2,  2])

## Create a 4x4 matrix with integer random numbers between 0 and 5.

In [15]:
np.random.randint(6, size=(4,4))

array([[4, 5, 0, 5],
       [5, 3, 2, 5],
       [3, 0, 3, 5],
       [5, 3, 5, 2]])

## Find the indices of the elements that are not 0 in the vector: [2, 1, 0, 0, 3, 1, 0].

In [20]:
np.where(np.array([2, 1, 0, 0, 3, 1, 0]) != 0)[0]

array([0, 1, 4, 5])

## Create a random vector of length 10 and find the minimum, maximum, mean and standard deviation.

In [24]:
v = np.random.random(size=10)
v.max(), v.min(), v.mean(), v.std()

(0.9528553516873413,
 0.07689562026138008,
 0.39214818535806034,
 0.265099059517726)

## Create a 10x10 matrix with zeroes inside and ones surrounding them, like the following:

```
[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]
```

In [26]:
V = np.ones((10,10))
V[1:-1, 1:-1] = 0
V

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

## Defina a function that, given a vector, returns all the elements greater than 10 replaced by 0.

Example: [4, 7, 15, 21, 3, 34] → [4, 7, 0, 0, 3, 0]

In [31]:
def sub10(v):
    v = np.array(v)
    v[v > 10] = 0
    return v

In [32]:
sub10([4, 7, 15, 21, 3, 34])

array([4, 7, 0, 0, 3, 0])

## Define a function that negates (changes the sign) all the elements of a vector between 2 and 6.

Example: [4, 7, 15, 21, 3, 34] → [-4,  7, -6, 21, -3, 34]

In [35]:
def reverse_sign(v):
    v = np.array(v)
    filt = (v >= 2) & (v <= 6)
    v[filt] = v[filt] * -1
    return v

In [38]:
reverse_sign([4, 7, 6, 21, 3, 34])

array([-4,  7, -6, 21, -3, 34])

## Define a function that returns the common elements between two vectors.

In [11]:
def intersection(v1, v2):
    return np.intersect1d(v1, v2)

In [12]:
intersection([1, 2, 3], [1, 2, 10])

array([1, 2])

## Get the dates for today, yesterday and tomorrow.

In [41]:
ahir = np.datetime64('today') - np.timedelta64(1)
avui = np.datetime64('today')
dema = np.datetime64('today') + np.timedelta64(1)

ahir, avui, dema

(numpy.datetime64('2022-10-26'),
 numpy.datetime64('2022-10-27'),
 numpy.datetime64('2022-10-28'))

## Get all dates in September 2023.

In [13]:
np.arange('2023-09', '2023-10', dtype='datetime64[D]')

array(['2023-09-01', '2023-09-02', '2023-09-03', '2023-09-04',
       '2023-09-05', '2023-09-06', '2023-09-07', '2023-09-08',
       '2023-09-09', '2023-09-10', '2023-09-11', '2023-09-12',
       '2023-09-13', '2023-09-14', '2023-09-15', '2023-09-16',
       '2023-09-17', '2023-09-18', '2023-09-19', '2023-09-20',
       '2023-09-21', '2023-09-22', '2023-09-23', '2023-09-24',
       '2023-09-25', '2023-09-26', '2023-09-27', '2023-09-28',
       '2023-09-29', '2023-09-30'], dtype='datetime64[D]')

## Create an array of dates starting from today and covering the next 7 days.

In [26]:
np.array([np.datetime64('today') + np.timedelta64(i, 'D') for i in range(8)])

array(['2023-08-15', '2023-08-16', '2023-08-17', '2023-08-18',
       '2023-08-19', '2023-08-20', '2023-08-21', '2023-08-22'],
      dtype='datetime64[D]')

## Given an array of dates, find the differences (in days) between each date and the first date in days.

In [28]:
dates = np.array(['2023-08-14', '2023-08-17', '2023-08-20'], dtype='datetime64')

In [27]:
dates[1:] - dates[0]

array([3, 6], dtype='timedelta64[D]')

## Create a random vector of length 10 with values between 0 and 1 (both included) and with 3 decimal points.

Note that by default `np.random.random()` does not include 1.

In [17]:
precision = 3
np.random.randint(0, 10 ** precision) / float(10 ** precision)

0.076

## Create a random vector of length 10 and replace the maximum element with a 1 and the minimum with a 0.

In [46]:
v = np.random.random(10)
v[v.argmin()] = 0
v[v.argmax()] = 10
v

array([ 0.48236927,  0.33532455,  0.42394466,  0.46622942, 10.        ,
        0.        ,  0.85299109,  0.3400673 ,  0.59281017,  0.67469525])

## Define a function that, given a number N and a vector, returns the element closest to N. In case of a tie return the previous element.

In [19]:
def closest(v, n):
    v = np.array(v)
    idx = np.abs(v - n).argmin()
    return v[idx]

In [50]:
closest([1, 2, 3, 4], 5)

4

In [51]:
closest([1, 2, 3, 4], 2)

2

In [23]:
closest([1, 2, 2, 3, 4], 2.6)

3

In [25]:
assert closest([1, 2, 3, 4], 2) == 2
assert closest([1, 2, 3, 4], 5) == 4
assert closest([1, 2, 3, 4], -3) == 1
assert closest([1, 2, 2, 3, 4], 2) == 2
assert closest([1, 2, 2, 3, 4], 2.4) == 2
assert closest([1, 2, 2, 3, 4], 2.6) == 3

## Define a function that returns the N greater elements from a vector.

In [57]:
def greater(v, n):
    v = np.array(v)
    v.sort()
    return v[::-1][:n]

In [58]:
greater([3, 7, 9, 3, 5], 2)

array([9, 7])

## Create a random vector of length 20 with values between 1 and 8. Which unique elements (without repetition) where created?

In [59]:
v = np.random.randint(1, 8, 20)
np.unique(v)

array([1, 2, 3, 4, 5, 6])

## What will be the result of the following statements?

First try to understand the concepts of `np.nan`, `np.inf` and `set`. Next, try to mentally solve the statements by using your logic. Finally, test if your expected results are correct. 

```python
0 * np.nan
np.nan == np.nan
np.inf > np.nan
np.nan - np.nan
np.nan in set([np.nan])
```

## Given a 10x10 matrix of random values between 0 and 1, find and print the row with the highest mean value.

In [29]:
matrix = np.random.rand(10, 10)
mean_values = matrix.mean(axis=1)
matrix[np.argmax(mean_values)]

array([0.88357048, 0.86021135, 0.86918851, 0.3697665 , 0.52902631,
       0.90608512, 0.29074085, 0.66909895, 0.51804262, 0.71613131])

## Take two vectors of size 10 with random integers between 1 and 20. Compute the outer product of the two vectors, then flatten the resultant matrix into a one-dimensional array.

In [30]:
vec1 = np.random.randint(1, 21, 10)
vec2 = np.random.randint(1, 21, 10)
outer_product = np.outer(vec1, vec2)
outer_product.flatten()

array([ 76,  44,  28,  64,  28,  40,  80,  60,   4,  64, 152,  88,  56,
       128,  56,  80, 160, 120,   8, 128, 133,  77,  49, 112,  49,  70,
       140, 105,   7, 112, 152,  88,  56, 128,  56,  80, 160, 120,   8,
       128,  57,  33,  21,  48,  21,  30,  60,  45,   3,  48, 171,  99,
        63, 144,  63,  90, 180, 135,   9, 144, 114,  66,  42,  96,  42,
        60, 120,  90,   6,  96, 171,  99,  63, 144,  63,  90, 180, 135,
         9, 144, 152,  88,  56, 128,  56,  80, 160, 120,   8, 128, 114,
        66,  42,  96,  42,  60, 120,  90,   6,  96])

## Create an array of 100 consecutive dates starting from today. From this array, filter out all the dates that fall on a weekend (Saturday and Sunday).

In [32]:
dates = np.arange(np.datetime64('today'), np.datetime64('today') + np.timedelta64(100, 'D'))
weekends = np.isin(dates.astype('datetime64[D]').view('int64') % 7, [5, 6])
dates[~weekends]

array(['2023-08-17', '2023-08-18', '2023-08-19', '2023-08-20',
       '2023-08-21', '2023-08-24', '2023-08-25', '2023-08-26',
       '2023-08-27', '2023-08-28', '2023-08-31', '2023-09-01',
       '2023-09-02', '2023-09-03', '2023-09-04', '2023-09-07',
       '2023-09-08', '2023-09-09', '2023-09-10', '2023-09-11',
       '2023-09-14', '2023-09-15', '2023-09-16', '2023-09-17',
       '2023-09-18', '2023-09-21', '2023-09-22', '2023-09-23',
       '2023-09-24', '2023-09-25', '2023-09-28', '2023-09-29',
       '2023-09-30', '2023-10-01', '2023-10-02', '2023-10-05',
       '2023-10-06', '2023-10-07', '2023-10-08', '2023-10-09',
       '2023-10-12', '2023-10-13', '2023-10-14', '2023-10-15',
       '2023-10-16', '2023-10-19', '2023-10-20', '2023-10-21',
       '2023-10-22', '2023-10-23', '2023-10-26', '2023-10-27',
       '2023-10-28', '2023-10-29', '2023-10-30', '2023-11-02',
       '2023-11-03', '2023-11-04', '2023-11-05', '2023-11-06',
       '2023-11-09', '2023-11-10', '2023-11-11', '2023-

# Python vs Numpy

 For each one of the following exercises implement the functions using only Python, without any external libraries. Then compare the execution time with the `%%timeit` magic command (see the first example).

## Maximum

Write a function that find the maximum value of a list. Do not use the Python's `max()` function.

In [1]:
def my_max(l):
    # Find the maximum of a list of numbers
    mx = l[0]
    for e in l[1:]:
        if e > mx:
            mx = e
    return mx

In [2]:
tmp = np.random.randint(10, size=10000)

In [5]:
%%timeit

# My max
my_max(tmp)

594 µs ± 6.11 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [7]:
%%timeit

# Python's max
max(tmp)

455 µs ± 4.74 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [6]:
%%timeit

# Numpy's max
np.max(tmp)

8.1 µs ± 31.8 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## Mean

Write a function that returns the mean of a list.

In [1]:
def mean1(v):
    sum_ = 0
    count = 0
    for e in v:
        sum_ += e
        count += 1
    return sum_ / count

In [2]:
def mean2(v):
    return sum(v) / len(v)

In [None]:
tmp = np.random.randint(10, size=10000)

In [6]:
%%timeit

mean1(tmp)

1.2 ms ± 137 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [7]:
%%timeit

mean2(tmp)

753 µs ± 21.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [8]:
%%timeit

np.mean(tmp)

15.3 µs ± 340 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## Argmax

Write a function that returns the index of the greatest element of a list.

In [6]:
def argmax(v):
    n_max = -np.inf
    i_max = None
    for i, n in enumerate(v):
        if n > n_max:
            n_max = n
            i_max = i
    return i_max

In [9]:
tmp = np.random.randint(10, size=10000)

In [10]:
%%timeit

argmax(tmp)

816 µs ± 47.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [11]:
%%timeit

np.argmax(tmp)

10.8 µs ± 257 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## Scalar Product

Write a function that returns the scalar product of two vectors.

In [12]:
def scalar_prod(v1, v2):
    sm = 0
    for n1, n2 in zip(v1, v2):
        sm += n1 * n2
    return sm

In [13]:
tmp1 = np.random.randint(10, size=10000)
tmp2 = np.random.randint(10, size=10000)

In [14]:
%%timeit

scalar_prod(tmp1, tmp2)

1.49 ms ± 39 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [15]:
%%timeit

np.dot(tmp1, tmp2)

7.01 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


# Bonus

##  One-hot Encoding

Implement a function that returns a matrix with the one-hot encoding of a vector.

- Example: 
    - Input: [0, 2, 1, 2, 0, 1]
    - Output:
```
[[1., 0., 0.],
[0., 0., 1.],
[0., 1., 0.],
[0., 0., 1.],
[1., 0., 0.],
[0., 1., 0.]]
```

In [62]:
def one_hot_encode(vector):
    # Identify the number of unique classes
    n_classes = np.unique(vector).shape[0]
    # Initialize an array of zeros with shape (length of vector, number of classes)
    one_hot = np.zeros((vector.shape[0], n_classes))
    # For each item in the vector, set the corresponding class index to 1
    for i, value in enumerate(vector):
        one_hot[i, value] = 1
    return one_hot

In [63]:
vector = np.array([0, 2, 1, 2, 0, 1])
one_hot_encode(vector)

array([[1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.]])

## Fruit Ninja

You are a Fruit Ninja master, slicing fruits with precision. You have sliced 100 fruits, each giving you a random score between 1 and 100. However, due to a ninja technique, every third score is doubled. Calculate your total score.

In [34]:
scores = np.random.randint(1, 101, 100)
scores[::3] *= 2
np.sum(scores)

6458

## Time Traveler

A historian time traveler wants to study major events in the past century. Create an array of dates starting from 1923-08-14 to 2023-08-14 in 10-year increments, then randomly select 3 dates to travel to.

In [39]:
dates = np.arange('1923-08-14', '2023-08-14', dtype='datetime64[10Y]')
# To ensure the dates, not just years
dates = dates.astype('datetime64[D]')
np.random.choice(dates, 3, replace=False)

array(['1950-01-01', '1920-01-01', '2010-01-01'], dtype='datetime64[D]')

## Aliens

Aliens are sending messages, but it seems they're padding the real messages with a lot of zeros. Their messages are arrays of 100 elements with values either 0 or 1, but the real messages start and end with `1`. Extract their real message.

Challenge: Give a solution with at most 3 lines of code.

In [10]:
def get_aliens_message():
    # Creating a message where majority is zeros
    message = np.zeros(100, dtype=int)

    # Introducing the real message
    start_index = np.random.randint(10, 40)
    end_index = np.random.randint(60, 90)

    message[start_index:end_index] = np.random.randint(2, size=end_index-start_index)
    message[start_index] = 1
    message[end_index] = 1
    return message

In [None]:
message = get_aliens_message()

In [48]:
# Extract real message
start, end = np.where(message == 1)[0][[0, -1]]
# Real message
message[start:end+1]

array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1])

## Temperature Log

You have a 365-day temperature log for a city, represented as an array of floats. Identify the days where the temperature is at least 2 standard deviations away from the mean.

In [56]:
def detect_temperature_anomalies(temperatures):
    mean_temp = np.mean(temperatures)
    std_temp = np.std(temperatures)
    anomalies = np.abs(temperatures - mean_temp) > 2 * std_temp
    return np.where(anomalies)[0]

In [57]:
# Simulated temperatures around 25°C with std deviation 5°C
temps = np.random.normal(25, 5, 365)
detect_temperature_anomalies(temps)

array([  6,  37,  81, 110, 146, 156, 161, 187, 192, 233, 263, 275, 318,
       336, 361])

## Seismic Activity

Given a sequence of data representing seismic activity, where the values are floats, identify "earthquake" events. An earthquake is defined as a sequence where the readings rise continuously for at least 5 readings, peak, and then fall continuously for at least 5 readings.

Hint: Utilize Numpy's `gradient()` function to determine the rising and falling patterns in the sequence.

In [58]:
def detect_earthquake(seismic_data):
    gradient = np.gradient(seismic_data)
    rise = gradient > 0
    fall = gradient < 0

    for i in range(len(seismic_data) - 11):  # Subtract 11 for 5 rise, 1 peak, 5 fall
        if np.all(rise[i:i+5]) and np.all(fall[i+6:i+11]):
            return True
    return False

In [59]:
seismic_data = np.array([0, 0, 1, 2, 3, 4, 5, 5.5, 5, 4, 3, 2, 1, 0, 0])
detect_earthquake(seismic_data)

True

## Coin Flipping

A magician flips a coin 100 times, but his coin is tricky. It has a 70% chance of landing on heads (represented by 1) and 30% chance of tails (represented by 0). He asks you to predict how many times in a row the coin landed on heads at least 3 times.

Count every occurrence of 3 consecutive heads, including overlapping sequences. For example, in the sequence [1,1,1,1,0], the count is 2.

In [53]:
flips = np.random.choice([0, 1], 100, p=[0.3, 0.7])
filter = np.array([1, 1, 1])
conv_result = np.convolve(flips, filter, mode='valid')
np.sum(conv_result == 3)

51

## The Game of Life

Implement "The Game of Live" using Numpy.

In [26]:
# Author: Nicolas Rougier

def iterate(Z):
    # Count neighbours
    N = (Z[0:-2,0:-2] + Z[0:-2,1:-1] + Z[0:-2,2:] +
         Z[1:-1,0:-2]                + Z[1:-1,2:] +
         Z[2:  ,0:-2] + Z[2:  ,1:-1] + Z[2:  ,2:])

    # Apply rules
    birth = (N==3) & (Z[1:-1,1:-1]==0)
    survive = ((N==2) | (N==3)) & (Z[1:-1,1:-1]==1)
    Z[...] = 0
    Z[1:-1,1:-1][birth | survive] = 1
    return Z

In [27]:
Z = np.random.randint(0, 2, (10,10))
for i in range(10):
    Z = iterate(Z)
    print(Z)
    print()

[[0 0 0 0 0 0 0 0 0 0]
 [0 1 0 1 1 0 1 1 1 0]
 [0 1 0 1 1 0 1 1 0 0]
 [0 1 0 0 0 0 0 0 1 0]
 [0 1 1 1 1 0 1 0 1 0]
 [0 0 0 0 0 0 1 0 1 0]
 [0 1 1 1 1 1 0 0 0 0]
 [0 0 0 1 1 1 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]

[[0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 1 0 1 0 1 0]
 [0 1 0 1 1 0 1 0 0 0]
 [0 1 0 0 0 0 1 0 1 0]
 [0 1 1 1 0 1 0 0 1 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 1 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 1 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]

[[0 0 0 0 0 0 0 0 0 0]
 [0 0 1 1 1 0 0 1 0 0]
 [0 0 0 1 1 0 1 0 0 0]
 [0 1 0 0 0 0 1 0 0 0]
 [0 1 1 0 0 1 1 0 0 0]
 [0 1 0 1 0 0 1 1 0 0]
 [0 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 1 1 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]

[[0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 1 1 0 0 0 0]
 [0 0 0 0 1 0 1 1 0 0]
 [0 1 0 1 1 0 1 1 0 0]
 [0 1 0 0 0 1 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 1 0 0]
 [0 0 0 0 0 1 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]

[[0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 1 1 1 0 0 0]
 [0 0 1 0 0 0 0 1 0 0]
 [0