# NumPy Pair Problem

For today's pair, we will use NumPy to complete the following tasks consecutively.

In [2]:
import numpy as np

# Set random seed so that results are reproducible 
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
* Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
* Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
* Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

In [3]:
array1=np.random.uniform(1,100,10000)
array2=np.random.normal(0,1,10000)
array3=np.random.binomial(10,0.5,10000)
print('array1',array1[:5])
print('array2',array2[:5])
print('array3',array3[:5])

array1 [55.33253689 71.80374727 60.67357423 54.94343512 42.94182513]
array2 [-0.72582032  0.56347552 -0.43563209 -0.10455255 -2.32127283]
array3 [4 5 5 3 8]


### 2. For each array, randomly sample 1,000 rows without replacement. (hint: `np.random.choice()`)


In [4]:
ch1=np.random.choice(array1,1000,replace=False)
ch2=np.random.choice(array2,1000,replace=False)
ch3=np.random.choice(array3,1000,replace=False)
print('ch1',ch1[:5])
print('ch2',ch2[:5])
print('ch3',ch3[:5]) 

ch1 [25.78942307 22.82505784 72.23603848 73.69670482 29.80696432]
ch2 [-2.33277117  1.63920097 -1.08501398  0.06679293 -0.41540346]
ch3 [6 6 5 7 3]


### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation 
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

In [5]:

samples = [ch1, ch2, ch3]
 
def calc(element):
    mean = np.mean(element)
    median = np.median(element)
    std_dev = np.std(element)
    mx_ind = np.argmax(element)
    mx_val = element[mx_ind]
    percentile_75 = np.percentile(element, 75)
   
    return mean, median, std_dev, mx_ind, mx_val, percentile_75
 
lst = [calc(sample) for sample in samples]
for i in range(len(lst)):
    stats = lst[i]
    print(f"Sample {i+1}:")
    print(f"Mean: {stats[0]}")
    print(f"Median: {stats[1]}")
    print(f"Standard Deviation: {stats[2]}")
    print(f"Largest Indax: {stats[3]}")
    print(f"Largest Element: {stats[4]}")
    print(f"75th Percentile: {stats[5]}")
    print()


Sample 1:
Mean: 50.552213219585695
Median: 50.00499476743026
Standard Deviation: 28.406698883921372
Largest Indax: 255
Largest Element: 99.83117493278014
75th Percentile: 75.20925502271464

Sample 2:
Mean: -0.033986719219710367
Median: -0.026937092247233604
Standard Deviation: 0.9747233951952765
Largest Indax: 628
Largest Element: 2.699568441630259
75th Percentile: 0.6323050377198595

Sample 3:
Mean: 4.896
Median: 5.0
Standard Deviation: 1.562428878381349
Largest Indax: 7
Largest Element: 10
75th Percentile: 6.0



### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`). Now, check the shape of the array.

In [6]:
sortarr=[np.sort(sample) for sample in samples]
#print(sortarr[:5])
comarr=np.stack(sortarr,axis=1)
arrshap=comarr.shape
print(arrshap)

(1000, 3)


### 5. Replace all negative values with 0. Replace all values that exceed 10 with 10.

In [7]:
comarr[comarr<0]=0
comarr[comarr>10]=10
comarr.shape

(1000, 3)

### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

In [9]:
subarr = np.where(comarr > 5, comarr - 5, comarr)
subarr.shape

(1000, 3)

### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`). Now check the new shape of the array.

In [15]:
fourth_col = subarr[:, 0] + subarr[:, 1]

newarr = np.hstack((subarr, fourth_col.reshape(-1, 1)))
newarr.shape

print(newarr[:5])


[[1.1110819  0.         1.         1.1110819 ]
 [1.13557152 0.         1.         1.13557152]
 [1.13695165 0.         1.         1.13695165]
 [1.16469874 0.         1.         1.16469874]
 [1.19416691 0.         1.         1.19416691]]


### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

In [14]:
addval = np.array([1, 2, 3, 4])
newarr += addval
newarr.shape

(1000, 4)

### 9. Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product?  (hint: `np.unique()`)

In [16]:
ones_arr = np.ones((4, 1))
res = np.dot(newarr, ones_arr)

unique_val = np.unique(res)
num_unique = len(unique_val)

print(f"num of unique: {num_unique}")

num of unique: 571


# Thank You