# NumPy Pair Problem

For today's pair, we will use NumPy to complete the following tasks consecutively.

In [1]:
import numpy as np

# Set random seed so that results are reproducible
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
* Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
* Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
* Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

In [57]:
array1 = np.random.uniform(1, 100, 10000)
array2 = np.random.normal(0, 1, 10000)
array3 = np.random.binomial(10, 0.5, 10000)
array3

array([4, 5, 5, ..., 5, 4, 4])

### 2. For each array, randomly sample 1,000 rows without replacement. (hint: `np.random.choice()`)


In [58]:
array1 = np.random.choice(array1, 1000, replace=False)
array2 = np.random.choice(array2, 1000, replace=False)
array3 = np.random.choice(array3, 1000, replace=False)
print(array1[1:10])
print(array2[1:10])
print(array3[1:10])

[22.82505784 72.23603848 73.69670482 29.80696432 40.46328307 95.02707208
 92.90497616  7.1161279  84.14966865]
[ 1.63920097 -1.08501398  0.06679293 -0.41540346  0.43090726  0.32312015
  1.53491933 -0.21782625  0.88094935]
[ 6  5  7  3  5  2 10  4  6]


### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

In [59]:
mean1 = np.mean(array1)
mean2 = np.mean(array2)
mean3 = np.mean(array3)
print(mean1,mean2,mean3)

largest1 = np.argmax(array1)
largest2 = np.argmax(array2)
largest3 = np.argmax(array3)
print(largest1,largest2,largest3)


50.552213219585695 -0.033986719219710367 4.896
255 628 7


In [60]:
std1 = np.std(array1)
std2 = np.std(array2)
std3 = np.std(array3)
print(std1,std2,std3)

28.406698883921372 0.9747233951952765 1.562428878381349


In [61]:
median1 = np.median(array1)
median2 = np.median(array2)
median3 = np.median(array3)
print(median1,median2,median3)

50.00499476743026 -0.026937092247233604 5.0


In [62]:
largest1 = np.argmax(array1)
largest2 = np.argmax(array2)
largest3 = np.argmax(array3)
print(largest1,largest2,largest3)

255 628 7


In [63]:
thirdpercentile1 = np.percentile(array1,75)
thirdpercentile2 = np.percentile(array2,75)
thirdpercentile3 = np.percentile(array3,75)
print(thirdpercentile1,thirdpercentile2,thirdpercentile3)

75.20925502271464 0.6323050377198595 6.0


### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`). Now, check the shape of the array.

In [64]:
a =np.sort(array1)
b = np.sort(array2)
c = np.sort(array3)
combined_array = np.stack((a,b,c))
combined_array

array([[ 1.1110819 ,  1.13557152,  1.13695165, ..., 99.57570491,
        99.65852424, 99.83117493],
       [-3.69428529, -3.01787832, -2.92415314, ...,  2.52860191,
         2.55385416,  2.69956844],
       [ 1.        ,  1.        ,  1.        , ...,  9.        ,
        10.        , 10.        ]])

### 5. Replace all negative values with 0. Replace all values that exceed 10 with 10.

In [65]:
s =np.clip(combined_array,0,10)
s

array([[ 1.1110819 ,  1.13557152,  1.13695165, ..., 10.        ,
        10.        , 10.        ],
       [ 0.        ,  0.        ,  0.        , ...,  2.52860191,
         2.55385416,  2.69956844],
       [ 1.        ,  1.        ,  1.        , ...,  9.        ,
        10.        , 10.        ]])

### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

In [66]:
d = np.where(s > 5, s-5, s)
d = d.reshape(1000,3)
d.shape

(1000, 3)

### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`). Now check the new shape of the array.

In [67]:
# prompt: how to add fourth_col into d array

fourth_col = d[:, 0] + d[:, 1] # 1000,
fourth_col = fourth_col.reshape(-1,1) # Reshape to a column vector , 1000 x 1
print(fourth_col.shape)
d = np.hstack((d, fourth_col))
d

(1000, 1)


array([[1.1110819 , 1.13557152, 1.13695165, 2.24665342],
       [1.16469874, 1.19416691, 1.3488835 , 2.35886565],
       [2.04798822, 2.12234098, 2.26413372, 4.1703292 ],
       ...,
       [4.        , 4.        , 4.        , 8.        ],
       [4.        , 4.        , 4.        , 8.        ],
       [4.        , 5.        , 5.        , 9.        ]])

### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

In [69]:
# prompt: Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

add_values = np.array([1, 2, 3, 4])
d[:, :4] = d[:, :4] + add_values
d


array([[ 2.1110819 ,  3.13557152,  4.13695165,  6.24665342],
       [ 2.16469874,  3.19416691,  4.3488835 ,  6.35886565],
       [ 3.04798822,  4.12234098,  5.26413372,  8.1703292 ],
       ...,
       [ 5.        ,  6.        ,  7.        , 12.        ],
       [ 5.        ,  6.        ,  7.        , 12.        ],
       [ 5.        ,  7.        ,  8.        , 13.        ]])

2

### 9. Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product?  (hint: `np.unique()`)

In [70]:
# prompt: Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product? (hint: np.unique())

ones_array = np.ones((4, 1))
product_array = np.dot(d, ones_array)
num_unique = len(np.unique(product_array))
num_unique

202

# Thank You