# Clipping and sampling

In [1]:
import numpy as np
import numpy.random as npr

In [3]:
x = npr.randint(-10, 10, size=100)
print(x)

[ -6  -6   2  -5   6   4  -8   6  -6  -4   0   2  -3 -10  -5  -8  -9   7
  -3   0   7  -4   2   7  -2  -2   3  -6 -10  -8  -5  -3   4   9  -5   8
   6   3 -10   3   6   4   4  -1  -5   2   8  -6   5  -8   2  -4   8  -5
  -8  -1  -5 -10   1  -2  -5   3   7  -1   4  -5   5  -2  -3 -10   8   0
  -5   5 -10   7  -4  -7   9   6  -5  -2  -2   9   2  -2   2   7  -3   0
   2   5  -7   9  -8  -1   0   1   1   0]


### Get observations that meet an upper and lower bound

In [4]:
np.clip(x, -4, 0)

array([-4, -4,  0, -4,  0,  0, -4,  0, -4, -4,  0,  0, -3, -4, -4, -4, -4,
        0, -3,  0,  0, -4,  0,  0, -2, -2,  0, -4, -4, -4, -4, -3,  0,  0,
       -4,  0,  0,  0, -4,  0,  0,  0,  0, -1, -4,  0,  0, -4,  0, -4,  0,
       -4,  0, -4, -4, -1, -4, -4,  0, -2, -4,  0,  0, -1,  0, -4,  0, -2,
       -3, -4,  0,  0, -4,  0, -4,  0, -4, -4,  0,  0, -4, -2, -2,  0,  0,
       -2,  0,  0, -3,  0,  0,  0, -4,  0, -4, -1,  0,  0,  0,  0])

### Replace observations with a threshold value

In [5]:
x.clip(0)

array([0, 0, 2, 0, 6, 4, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 7, 0, 0, 7, 0,
       2, 7, 0, 0, 3, 0, 0, 0, 0, 0, 4, 9, 0, 8, 6, 3, 0, 3, 6, 4, 4, 0,
       0, 2, 8, 0, 5, 0, 2, 0, 8, 0, 0, 0, 0, 0, 1, 0, 0, 3, 7, 0, 4, 0,
       5, 0, 0, 0, 8, 0, 0, 5, 0, 7, 0, 0, 9, 6, 0, 0, 0, 9, 2, 0, 2, 7,
       0, 0, 2, 5, 0, 9, 0, 0, 0, 1, 1, 0])

### Random sampling

In [6]:
y = npr.randint(1000, size=100)
print(y)

[151 958 813 416 806 766 592 911  74 226 562  10 420 881 620 851 218 135
 177 570 296 270 901 567 927 779 644 980 153 127  86 602 143 233  27 694
 283 818 664 944 192 203 630  50  56 107 106 728 237 209 420 786 247  79
 661 623 817 239 922 883 595 996 958 575 983 170 390 304 861 448 876 757
 812  76 563  72 174 572 241 315 543 323 979 733 905 277 547 490 284 796
 311 221 752 340 150 347 944 498 852 643]


In [9]:
npr.choice(y,20, replace=False)

array([135, 543, 944, 241, 416, 630, 786, 851, 996, 592, 664, 911, 595,
       296, 143, 806, 304, 177, 661,  76])

### Choice can also be used to generate a non-uniform probablitiy distribution

In [13]:
npr.choice(np.arange(1,7), 1, p=[.2, .1, .3, .15, .05, .2])

array([3])

### Stable sample

In [14]:
z = npr.random(100)
sample = np.argsort(z)

In [15]:
sample

array([29, 84, 31, 21, 68, 97, 65,  7, 90, 86,  8, 71,  2, 67, 85, 23, 37,
       26, 17, 63, 79, 20, 74, 10, 64, 49, 39, 43, 36, 22, 30, 52, 46, 78,
       76, 35, 94, 77, 25, 40,  4, 18, 69, 11, 87, 93, 72, 70, 89,  5,  1,
       55, 16, 33,  0, 15, 12, 75,  6, 96, 81, 47, 13, 50,  9, 38, 19,  3,
       80, 60, 34, 99, 56, 44, 14, 27, 24, 28, 54, 73, 91, 45, 88, 61, 83,
       66, 92, 82, 62, 42, 98, 51, 95, 57, 58, 53, 48, 32, 41, 59])

In [16]:
z[sample]

array([4.11646199e-05, 1.38797792e-03, 1.57890197e-02, 2.15385523e-02,
       2.18725338e-02, 2.21223787e-02, 2.68250478e-02, 3.08124512e-02,
       3.66287600e-02, 4.09762621e-02, 4.93181731e-02, 5.11455831e-02,
       5.80826263e-02, 5.97992980e-02, 7.18833939e-02, 8.81623046e-02,
       1.00579394e-01, 1.09078921e-01, 1.22416221e-01, 1.24284335e-01,
       1.29855828e-01, 1.30630030e-01, 1.37988773e-01, 1.42786730e-01,
       1.45708532e-01, 1.51587759e-01, 1.57811909e-01, 1.59287137e-01,
       1.61626143e-01, 1.62804706e-01, 1.76906452e-01, 1.99140169e-01,
       2.01889248e-01, 2.16856826e-01, 2.36638885e-01, 2.52196896e-01,
       2.66459558e-01, 2.70466555e-01, 2.82108162e-01, 3.02002800e-01,
       3.04360915e-01, 3.08243121e-01, 3.23417796e-01, 3.26409848e-01,
       3.29577331e-01, 3.32303523e-01, 3.48535296e-01, 3.50762913e-01,
       3.63827798e-01, 3.64539238e-01, 3.65431351e-01, 3.72798221e-01,
       4.13218610e-01, 4.28414995e-01, 4.33376639e-01, 4.45893722e-01,
      

In [17]:
z[sample][:20]

array([4.11646199e-05, 1.38797792e-03, 1.57890197e-02, 2.15385523e-02,
       2.18725338e-02, 2.21223787e-02, 2.68250478e-02, 3.08124512e-02,
       3.66287600e-02, 4.09762621e-02, 4.93181731e-02, 5.11455831e-02,
       5.80826263e-02, 5.97992980e-02, 7.18833939e-02, 8.81623046e-02,
       1.00579394e-01, 1.09078921e-01, 1.22416221e-01, 1.24284335e-01])

In [18]:
y[sample][:20]

array([127, 905, 602, 270, 861, 498, 170, 911, 311, 547,  74, 757, 813,
       304, 277, 567, 818, 644, 135, 575])