In [2]:
import numpy as np
print(np.__version__)

2.3.3


Numpy provides many functions to **create** arrays


In [3]:
a = np.zeros((2,2))   # Create an array of all zeros
print(a)              # Prints "[[ 0.  0.]
                      #          [ 0.  0.]]"

b = np.ones((1,2))    # Create an array of all ones
print(b)              # Prints "[[ 1.  1.]]"

c = np.full((2,2), 7)  # Create a constant array
print(c)               # Prints "[[ 7.  7.]
                       #          [ 7.  7.]]"

d = np.eye(2)         # Create a 2x2 identity matrix
print(d)              # Prints "[[ 1.  0.]
                      #          [ 0.  1.]]"

e = np.random.random((2,2))  # Create an array filled with random values
print(e)                     

[[0. 0.]
 [0. 0.]]
[[1. 1.]]
[[7 7]
 [7 7]]
[[1. 0.]
 [0. 1.]]
[[0.57971889 0.20687025]
 [0.28347431 0.03244191]]



**Slicing:** In addition to accessing list elements one at a time, Python provides concise syntax to access sublists; this is known as slicing.


In [4]:
nums = list(range(5))     # range is a built-in function that creates a list of integers
nums = np.array(nums)     # convert python list to numpy array
print(nums)               # Prints "[0, 1, 2, 3, 4]"
print(nums[2:4])          # Get a slice from index 2 to 4 (exclusive); prints "[2, 3]"
print(nums[2:])           # Get a slice from index 2 to the end; prints "[2, 3, 4]"
print(nums[:2])           # Get a slice from the start to index 2 (exclusive); prints "[0, 1]"
print(nums[:])            # Get a slice of the whole list; prints "[0, 1, 2, 3, 4]"
print(nums[:-1])          # Slice indices can be negative; prints "[0, 1, 2, 3]"
nums[2:4] = [8, 9]        # Create a new sub-array
print(nums)  
print(nums.shape) 

[0 1 2 3 4]
[2 3]
[2 3 4]
[0 1]
[0 1 2 3 4]
[0 1 2 3]
[0 1 8 9 4]
(5,)



Let us see some examples of **Matrix** **multiplication**


In [5]:
import time  # import module to calculate execution time


In [7]:
x = np.array([1, 0.01, 0.5, 0.78])
w = np.array([1, 2, 3, -3])
b = 0.5

start_time = time.time()
y = np.dot(w, x) + b  # linear regression
end_time = time.time()
print("y: {} ".format(y))
print("y.shape(): {} ".format(y.shape))

vec_time = end_time-start_time
print("Execution time with vectorization {} seconds".format(vec_time))

y: 0.6800000000000002 
y.shape(): () 
Execution time with vectorization 6.008148193359375e-05 seconds


The same result can be obtained with a less efficient for loop.

In [8]:
start_time = time.time()
x_n = x.shape[0]
y = 0
for i in range(x_n):
   y += w[i]*x[i] + b
end_time = time.time()

print(y)
print(y.shape)

print("Execution time without vectorization {} seconds".format(end_time-start_time))
print(vec_time/(end_time-start_time))   # print the increase in execution time 
                                        # warning: for small vectors it varies a lot between runs!

2.1799999999999997
()
Execution time without vectorization 0.00010704994201660156 seconds
0.5612472160356348


The matmul function allow to multiply N-Dimensional arrays - the behaviour 



In [9]:
a = np.random.random_sample(size=(5,3))  # generate random matrix with 5 rows and 3 columns
b = np.random.random_sample(size=(3,4))  # generate random matrix with 3 rows and 4 columns

print(a)
print(b)

c = np.matmul(a,b)
print(c)
print(c.shape)     # prints the shape of the resulting matrix, e.g. (5,4)

[[0.97128759 0.73624683 0.54235267]
 [0.51680016 0.10078186 0.79329108]
 [0.3028057  0.05563823 0.64583734]
 [0.55637999 0.19323646 0.49425259]
 [0.48848117 0.67839405 0.35022448]]
[[0.40305553 0.06440383 0.91429906 0.49245079]
 [0.27852363 0.89950697 0.47465555 0.46058444]
 [0.29718086 0.18451111 0.12783393 0.09043268]]
[[0.75772181 0.82488389 1.30684205 0.86646158]
 [0.47212022 0.27030891 0.62175608 0.37265664]
 [0.32947458 0.18871299 0.3858239  0.23314781]
 [0.42495537 0.30084564 0.66360072 0.40768806]
 [0.48991382 0.70630055 0.81339195 0.58468242]]
(5, 4)



What happens if the inner dimensions are not compatible?


In [10]:
print(np.matmul(b, a))

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 4)

Let us see another example of how a vectorized implementation is more efficient!


In [11]:
a = np.random.random_sample((5000,))    # generate a large vector

# vectorized calculation
start_time = time.time()
exp_a = np.exp(a)
end_time = time.time()
print("Execution time with vectorization {} seconds".format(end_time-start_time))

start_time = time.time()
x_n = a.shape[0]
exp_a = np.zeros(a.shape)
for i in range(x_n):
   exp_a[i] = np.exp(a[i])
end_time = time.time()
print("Execution time without vectorization {} seconds".format(end_time-start_time))

Execution time with vectorization 0.00030517578125 seconds
Execution time without vectorization 0.003253936767578125 seconds


---
**Exercise 1**


Given the amount of Carbs, Proteins, Fats in 100g of different foods, 
knowing that carbs and proteins provides 4 calories and fats 9 calories,
calculate the % of calories from carbs, proteins and fats for each food


In [None]:
grams = np.array([[ 27, 5.8, 41.5, 18.0 ], [ 0.7, 2.5, 8, 4 ],[ 0.2, 0.3, 1.2, 29.5 ]])
# righe: feature
# colonne: piatto
grams

array([[27. ,  5.8, 41.5, 18. ],
       [ 0.7,  2.5,  8. ,  4. ],
       [ 0.2,  0.3,  1.2, 29.5]])

In [26]:
#insert here the solution

#a = [4, 4, 9]
#w = np.array([a])
w = np.array([[4, 4, 9]])
print(w.shape)
w = w.T
print(w.shape)
w


(1, 3)
(3, 1)


array([[4],
       [4],
       [9]])

In [28]:
# converto quantità in calorie

# prodotto riga con riga. es: 4 per ogni colonna [27. ,  5.8, 41.5, 18.]
cal = grams * w
cal 

array([[108. ,  23.2, 166. ,  72. ],
       [  2.8,  10. ,  32. ,  16. ],
       [  1.8,   2.7,  10.8, 265.5]])

In [None]:
# per ogni piatto calorie tot
cal_piatto = np.sum(cal, axis=0)
# calcolo % di ogni feature rispetto a tot calorie del piatto
percentuale = cal / cal_piatto * 100

percentuale

array([[95.91474245, 64.62395543, 79.50191571, 20.36775106],
       [ 2.48667851, 27.8551532 , 15.3256705 ,  4.5261669 ],
       [ 1.59857904,  7.52089136,  5.17241379, 75.10608204]])

---
**Exercise 2**

Given two 1-D arrays **x** (real values) and **y** (discreet labels), and a constant parameter *m*, implement the following formula:

$z_i = \bigg \{ \begin{matrix} \parallel x_i \parallel ^ 2 \text{  if  } y_i =1 \\ \parallel m - x_i \parallel ^ 2 \text{  if  } y_i = 0 \end{matrix}$

In [33]:
x = np.array([0.08444168, 0.5717077,  0.86764178, 0.2427889,  0.44898618, 0.23330771,
 0.14876752, 0.41267104, 0.38951113, 0.60130308])
print(x)
y = np.array([0, 0, 1, 1, 0, 0, 0, 1, 1, 0])
print(y)


[0.08444168 0.5717077  0.86764178 0.2427889  0.44898618 0.23330771
 0.14876752 0.41267104 0.38951113 0.60130308]
[0 0 1 1 0 0 0 1 1 0]


In [41]:
# insert here the solution

m = 0.2 # constant parameter

# ragionamento quello di avere due X diversi in cui in uno tengo le y=0 mentre l'altro ragionamento opposto 1-y 
# e infine di sommare le due in modo da ottenere z

z1 = x * y # tengo solo le x dove ho y = 1
z1 = np.square(z1)
print(z1)

z2 = (m - x) * (1-y)
z2 = np.square(z2)
print(z2)

z = z1 + z2
z

[0.         0.         0.75280226 0.05894645 0.         0.
 0.         0.17029739 0.15171892 0.        ]
[0.01335373 0.13816661 0.         0.         0.06199412 0.0011094
 0.00262477 0.         0.         0.16104416]


array([0.01335373, 0.13816661, 0.75280226, 0.05894645, 0.06199412,
       0.0011094 , 0.00262477, 0.17029739, 0.15171892, 0.16104416])

In [42]:
# insert here the solution
m = 0.2
a = x * x                        
b = (m - x) * (m -x)             # broadcasting will expand m to the same size of x

z = y * a + (1 - y) * b          # observe that for any given binary y, only one term is non-zero
                                 # e.g. z will be equal to a if y == 1 and viceversa
print(z)

[0.01335373 0.13816661 0.75280226 0.05894645 0.06199412 0.0011094
 0.00262477 0.17029739 0.15171892 0.16104416]


---
**Exercise 3**

Given a matrix x of size M by N, where M is the number of samples and N is the number of features, write a vectorized expression to perform min-max scaling:
𝑥′=  (𝑥  − min⁡(𝑥))/(max⁡(𝑥)−min⁡(𝑥))



In [52]:
M = 3
N = 2
X = np.random.random((M, N))
print(X)

[[0.21487119 0.85760805]
 [0.03689141 0.73459284]
 [0.22183039 0.15010387]]


In [55]:
# SBAGLIAO: STIAMO NORMALIZZANDO non ha senso prendere della matrice intera dove ho feature diverse, 
#           la normalizzazione avviene per ogni campione, ergo min max per ogni sua feature
X_vectorized = (X - np.min(X)) / (np.max(X) - np.min(X))
print(X_vectorized)

print(np.max(X_vectorized, axis=0) )     # sanity check: after rescaling the range of each feature should be 0-1
print(np.min(X_vectorized, axis=0) )

[[0.21685898 1.        ]
 [0.         0.85011245]
 [0.22533841 0.13794342]]
[0.22533841 1.        ]
[0.         0.13794342]


In [56]:
# Sol Corretta
X_vectorized = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0))
print(X_vectorized)

print(np.max(X_vectorized, axis=0) )     # sanity check: after rescaling the range of each feature should be 0-1
print(np.min(X_vectorized, axis=0) )

[[0.96237027 1.        ]
 [0.         0.82612794]
 [1.         0.        ]]
[1. 1.]
[0. 0.]


**Aggiunta di keepdim**

In sintesi:
- keepdims=False (default): riduce le dimensioni → (3, 4) → (3,)
- keepdims=True: mantiene le dimensioni → (3, 4) → (3, 1)

Serve per fare broadcasting pulito e prevedibile! 🎯

In [58]:
#different (BUT NOT EQUIVALENT) solution: sum along the second axis (axis = 1)
# in this vector, each feature vector is rescaled between 0 and 1, e.g.: 
# features with large values are pushed towards 1, features with small values are pushed towards 0
# in the correct solution, each feature is rescaled between 0 and 1 INDEPENDENTLY
np.min(X, axis=1, keepdims=True)
x_scaled = (X - np.min(X, axis=1, keepdims=True))/(np.max(X, axis=1, keepdims=True) - np.min(X, axis=1, keepdims=True))
print(x_scaled.shape)
print(x_scaled[0,:])   # notice how the recaled vector differs from the correct solution

(3, 2)
[0. 1.]


---
**Exercise 4**


Given a 1D array, calculate the average of each consecutive triplet


In [None]:
x = np.array([1, 3, 5, 10, 15, 12, 23, 5, 6]) # vettore perciò shape restituisce (#, ) ossia num elementi lista

# insert here the solution
x_shape = x.shape # tupla contente info (R, C)
print(x_shape)
if x.shape[0] % 3 != 0:
  print("The lenght of the array must be multiple of 3!") # alternatively, we could pad the array with 0
else:
  no_triplets =  x.shape[0] // 3 # calculate the number of triplets
  print(no_triplets)

  new_shape = (no_triplets , 3) # tupla
  print(new_shape)

  x_reshaped = x.reshape(new_shape) # eq np: x_reshaped = np.reshape(x, new_shape) 
  print(x_reshaped)

  x_reshaped_mean = np.mean(x_reshaped, axis=1) # calculate the sum of each triplet (row)
  print(x_reshaped_mean)
  

(9,)
3
(3, 3)
[[ 1  3  5]
 [10 15 12]
 [23  5  6]]
[ 3.         12.33333333 11.33333333]
