### **Python for Machine Learning**

#### [*Machine learnia video 12/30* **numpy Statistics and mathematics**][1]


[1]:https://www.youtube.com/watch?v=RwFiNlL4Q8g&list=PLO_fdPEVlfKqMDNmCFzQISI2H_nJcEDJq&index=13

##### **A. Classic methods with n dimensions array (.ndArray):** 


In [None]:
import numpy as np

# to sum all the (random) elements of an array (all axis)
A = np.random.randint(0,10,[10,10])

# to get a list, that print the sum of each column (ie. axis=0)
A.sum(axis=0)

# to get a list, that print the sum of each row (ie. axis=1)
A.sum(axis=1)

In [None]:
# Other classic operations of ndArray with axis as an argument :
print(A.prod(axis=0))        #product
print(A.cumsum(axis=0))      #cumulated sum
print(A.cumprod(axis=0))     #cumulated product

print(A.min(axis=0))         #minimum   
print(A.max(axis=0))         #max
print(A.argmin(axis=0))      #index of the minimum in this axis
print(A.argmax(axis=0))      #index of the maximum in this axis

print(A.sort(axis=0))        #sort each column in order
print(A.argsort())     #print the sorted index of original A elements

##### **B. Basic Statistics with numpy :** 


In [None]:
print(A.min(axis=0))         #minimum   
print(A.max(axis=0))         #max
print(A.argmin(axis=0))      #index of the minimum in this axis
print(A.argmax(axis=0))      #index of the maximum in this axis

In [None]:
A.mean(axis=1) #the mean of an axis (no argument = the mean of 1)

In [None]:
A.var(axis=1)  #the variance of an axis (no argument = the mean of 1)

In [None]:
A.std(axis=1)  #the standard deviation of an axis (no argument = the mean of 1)

##### **C. More advanced Statistics correlation with numpy :**

In [33]:
#modelize the correlating relationships within A columns and row, or with rther arrays
np.corrcoef(A) 

# correlation between [L1/L1] [L1/L2] [...] [L1/Ln-1] [L1/Ln]
#                     [L2/L1]
#                     [...]            [Li/Li]
#                     [Ln-1/L1]
#                     [Ln/L1]                          [Ln/Ln]

#Indexing to get a particular correlation

b = np.corrcoef(A)[2,0] # or np.corrcoef(A)[2,0]
print(b)

0.2773500981126145


In [None]:
# get a list with all the (different) values in the array (sorted)
# get the count of each of those values in the array

np.unique(A, return_counts=True)

##### **D. Deal with NAN :**

In [None]:

print(np.nanmean(A))  #np.nan<method_name>(array) --> return the method's result without taking into account nans
print(np.isnan(A))    #return a mask with True if there is a nan at this index
print(np.isnan(A).sum()) #counts the nan in the array (because True=1 False =0)

np.isnan(A).sum/A.size #return the ratio of nan on the array

A[np.isnan(A)] = 0 #replace all the nan by 0

##### **E. Linear Algebra with numpy (numpy.linalg):**

In [None]:
#Transpose

A = np.array([[1, 2, 3],[4,5,6]])
print(A)
A.T #return A transpose

In [None]:
#product of matrix

B = A.T + np.ones((3,2))

print(A.dot(B)) #product of matrix A and matrix B
print(B.dot(A)) #product of matrix B and matrix A

In [None]:
# compute a determinant

A = np.random.randint(0,10,(4,4))
print(A)
np.linalg.det(A)  #if det != 0, A^-1 exist

np.linalg.inv(A)  #= A^-1

In [None]:
# get the eigen values of A (first array) get the eigen vectors (second array) associated to the eigen values 
np.linalg.eig(A)

##### **Exercise :**
* _standardize each column of A (Z-score Normalization **(A - A.mean)/A.std**)_ 
* _with the matrix A given by_
>_np.random.seed(0)_  
>_A = np.random.randint(0,100, [10,5])_


In [115]:
np.random.seed(0)
A = np.random.randint(0,100,[10,5])

#My method
A_float = A.astype(np.float32) #change A into a float array

for i in range(5): # Compute the final matrix
    A_float[:,i]=np.float_((A_float[:,i]-A_float[:,i].mean())/A_float[:,i].std())
print(A_float)


#Better method (from correction)
print((A-A.mean(axis=0))/A.std(axis=0))



[[-0.02206157  0.          0.13173823  0.7253925   0.10755797]
 [-1.5663712   1.6157963  -1.4867601  -0.3303431   0.9680218 ]
 [ 1.1251398   1.8402126   1.0350862  -1.1476868  -0.27965075]
 [ 0.9045242  -0.35906586  0.99744666  0.01021677  1.011045  ]
 [ 1.6104944  -0.44883233 -1.3362021   1.0659523   0.32267392]
 [-1.5663712  -1.2118473   0.73397017  0.7935045   0.6238363 ]
 [ 0.11030783  0.763015    0.80924916  1.8151841   1.011045  ]
 [ 0.1985541  -0.80789816 -1.5620391  -0.90929484 -2.172671  ]
 [-0.24267723 -0.67324847  0.16937773 -1.2498547  -0.32267392]
 [-0.5515391  -0.7181317   0.50813323 -0.77307093 -1.2691841 ]]
[[-0.02206157  0.          0.13173823  0.72539252  0.10755798]
 [-1.56637126  1.61579632 -1.48676006 -0.33034307  0.96802178]
 [ 1.12513992  1.84021247  1.03508612 -1.14768676 -0.27965074]
 [ 0.90452425 -0.35906585  0.99744662  0.0102168   1.01104497]
 [ 1.6104944  -0.44883231 -1.33620208  1.0659524   0.32267393]
 [-1.56637126 -1.21184724  0.73397016  0.7935045   0.6