### Numpy :- Problem Solving 

In [1]:
import numpy as np

In [2]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr1.shape

(2, 3)

In [3]:
arr2 = np.array([0, 1, 0])
arr2.shape

(3,)

In [4]:
arr1 + arr2

array([[1, 3, 3],
       [4, 6, 6]])

In [5]:
arr1 + [1]

array([[2, 3, 4],
       [5, 6, 7]])

In [6]:
arr1 + [1, 2]

ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [None]:
arr1 + [[1], [2], [0]]

In [7]:
np.array([[1], [2], [0]]).shape

(3, 1)

## Broadcasting
**Rules :- Start Matching the dimenstion backwords (Right to Left)**
* Compatable :- If same number appears or if one of them is one (1).<br><br>
* Incompatable :- Otherwise

In [8]:
arr3 = np.array([[1, 2, 3], [4, 5, 6]])
arr4 = np.array([1, 2, 3])

In [9]:
arr3.shape, arr4.shape

((2, 3), (3,))

In [10]:
arr3 + arr4

array([[2, 4, 6],
       [5, 7, 9]])

In [11]:
arr5 = np.array([[1, 2, 3], [4, 5, 6]])
arr6 = np.array([[1], [2]])
arr5.shape, arr6.shape

((2, 3), (2, 1))

In [12]:
arr5 + arr6

array([[2, 3, 4],
       [6, 7, 8]])

In [13]:
arr_1 = np.array([[1, 2, 3], [4, 5, 6]])
arr_2 = np.array([1])
arr_1.shape, arr_2.shape

((2, 3), (1,))

In [14]:
arr_1 + arr2

array([[1, 3, 3],
       [4, 6, 6]])

In [15]:
arr_1 = np.array([1, 2, 3, 4, 5])
arr_2 = np.array([1, 2, 3, 4])
arr_1.shape, arr_2.shape

((5,), (4,))

In [16]:
arr_1 + arr_2

ValueError: operands could not be broadcast together with shapes (5,) (4,) 

In [17]:
arr_1 = np.array([[1], [2], [3], [4], [5]])
arr_2 = np.array([1, 2, 3, 4])
print(arr_1.shape, arr_2.shape)

(5, 1) (4,)


In [18]:
arr_1 + arr_2

array([[2, 3, 4, 5],
       [3, 4, 5, 6],
       [4, 5, 6, 7],
       [5, 6, 7, 8],
       [6, 7, 8, 9]])

In [19]:
arr_1 = np.array([[1], [2], [3], [4], [5]])
arr_2 = np.array([1, 2, 3, 4])
print(arr_1 * arr_2)

[[ 1  2  3  4]
 [ 2  4  6  8]
 [ 3  6  9 12]
 [ 4  8 12 16]
 [ 5 10 15 20]]


In [20]:
arr_1.dot(arr_2.reshape(1, -1))

array([[ 1,  2,  3,  4],
       [ 2,  4,  6,  8],
       [ 3,  6,  9, 12],
       [ 4,  8, 12, 16],
       [ 5, 10, 15, 20]])

In [21]:
np.matmul(arr_1, arr2.reshape(1, -1))

array([[0, 1, 0],
       [0, 2, 0],
       [0, 3, 0],
       [0, 4, 0],
       [0, 5, 0]])

### Question: Randomly generate a matrix of shape (1Million, 2) and perform below mentioned operations :

a. Find the distances between each 2-Dimensional data point from the centroid (i.e. mean) of the given dataset. Append the newly calculated distances as new column with the given dataset.

b. Given any data point, generate 3 closest neighbors from it.


In [2]:
# Step 1 :- Generate a random data
arr = np.random.rand(1000000, 2)

In [3]:
arr.shape

(1000000, 2)

In [4]:
# Step 2 :- Compute the centeroid
centeroid = np.mean(arr, axis=0)

In [5]:
centeroid.shape, centeroid

((2,), array([0.50030837, 0.49965002]))

In [6]:
# Step3 :- Compute the Distances btn each 2d datapoint and the centeroid
d = np.sqrt(np.sum((arr - centeroid) ** 2, axis=1))

In [7]:
d.shape

(1000000,)

In [8]:
d

array([0.52250972, 0.49817231, 0.39495643, ..., 0.50228219, 0.50392388,
       0.35281207])

In [9]:
arr

array([[0.84354182, 0.89361348],
       [0.22755154, 0.91651854],
       [0.89030265, 0.43723971],
       ...,
       [0.62668286, 0.98577437],
       [0.75101031, 0.06251409],
       [0.77863124, 0.71647421]])

In [10]:
# Reshaping the 
d.reshape(-1, 1).shape

(1000000, 1)

In [13]:
new_arr = np.hstack([arr, d.reshape(-1, 1)])

In [14]:
new_arr.shape

(1000000, 3)

In [18]:
# Args-sort will give the index 
d.argsort()[:3]

array([742987, 143773, 164494], dtype=int64)

In [16]:
new_arr[d.argsort()][:3]

array([[5.00532806e-01, 4.99767820e-01, 2.53473329e-04],
       [5.00779697e-01, 4.99682457e-01, 4.72441477e-04],
       [4.99782836e-01, 4.99801199e-01, 5.46848595e-04]])

In [35]:
# Concating the distance array in new array
new_arr = np.column_stack((arr, d))

In [36]:
# Find indices of 3 closest neighbors from centroid
closest_neighbors_indices = np.argsort(d)[1:4]

In [37]:
# Extract coordinates and distances of closest neighbors
closest_neighbors = arr[closest_neighbors_indices]
closest_neighbors_distances = d[closest_neighbors_indices]

In [38]:
print(closest_neighbors)
print(closest_neighbors_distances)

[[0.50008092 0.5003501 ]
 [0.49908634 0.50008257]
 [0.49939932 0.50094779]]
[0.00037914 0.00085374 0.00108945]
