## **Numpy Recap on May 11**

In [1]:
import numpy as np

In [2]:
l = list(range(1,10))
l

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [3]:
l = range(10000000)
%timeit [i**2 for i in l]

744 ms ± 136 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
arr = np.array(l)
%timeit arr**2

18.4 ms ± 495 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
np.array([1,2,3,4,5,6])

array([1, 2, 3, 4, 5, 6])

In [6]:
a = list(range(0,16))
arr1 = np.array(a)
arr1.reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [7]:
arr1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

### **2D Array**

In [8]:
arr2 = arr1.reshape(4,4)
arr2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [9]:
arr2.shape, arr2.ndim

((4, 4), 2)

### **3D Array**

In [10]:
arr = np.arange(0,27)
arr3 = arr.reshape(3,3,3)
arr3

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [11]:
arr3.shape, arr3.ndim

((3, 3, 3), 3)

In [12]:
arr3

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [13]:
arr4 = np.arange(1,10,0.5)
arr4

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. ,
       7.5, 8. , 8.5, 9. , 9.5])

In [14]:
arr4.shape, arr4.ndim

((18,), 1)

### **Array Type Conversion**

In [15]:
arr5 = np.array([1,"Chaitu",3.5,True])
arr5

array(['1', 'Chaitu', '3.5', 'True'], dtype='<U32')

In [16]:
arr6 = np.array([1,4.5,True])
arr6

array([1. , 4.5, 1. ])

In [17]:
arr7 = np.array([1,True])
arr7

array([1, 1])

### **DType & Astype**

In [18]:
a = np.array([1,2,3,4])
a.dtype

dtype('int64')

In [19]:
b = np.array([1,2,3,4], dtype = "float64")
b

array([1., 2., 3., 4.])

In [20]:
b.astype("str")

array(['1.0', '2.0', '3.0', '4.0'], dtype='<U32')

### **Indexing**

In [21]:
a = np.arange(0,19)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18])

In [22]:
a[0],a[9], a[[2,4]],a[[-1,1,-2,2]]

(0, 9, array([2, 4]), array([18,  1, 17,  2]))

### **Slicing**

In [23]:
a[2:5]

array([2, 3, 4])

In [24]:
a[4:9]

array([4, 5, 6, 7, 8])

In [25]:
a[-5:-1]

array([14, 15, 16, 17])

In [26]:
a < 6

array([ True,  True,  True,  True,  True,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [27]:
a[a<6]

array([0, 1, 2, 3, 4, 5])

In [28]:
a = np.random.randint(1,100,10)
b = np.random.randint(1,100,10)

In [29]:
# Checking equality
a == b

array([False, False, False, False, False, False, False, False, False,
       False])

In [30]:
# Getting even and odd index
a[::2], a[1::2]

(array([58, 11, 97, 82, 84]), array([72, 56, 15, 18, 49]))

In [31]:
a[[0,2,4]]

array([58, 11, 97])

### **Reading file**

In [32]:
score = np.loadtxt("survey.txt")
score

array([ 7., 10.,  5., ...,  5.,  9., 10.])

In [33]:
# % of promoters
promoters = score[score > 8]
print("% of promoters",len(promoters)/len(score) * 100)
# % of dectractors
dectractors = score[score <= 6]
print("% of dectractors",len(dectractors)/len(score) * 100)

% of promoters 52.185089974293064
% of dectractors 28.449014567266495


In [34]:
NPS = (len(promoters)/len(score) * 100) - (len(dectractors)/len(score) * 100)
NPS

23.73607540702657

### **Reshape**

In [35]:
arr = np.arange(1, 19)
arr.shape

(18,)

In [36]:
arr1 = arr.reshape(3,6)
arr1

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18]])

In [37]:
arr2 = arr.reshape(3, -1)
arr2

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18]])

In [38]:
arr3 = arr.reshape(-1,2)
arr3

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12],
       [13, 14],
       [15, 16],
       [17, 18]])

In [39]:
arr4 = arr.reshape(1,3,6)
arr4

array([[[ 1,  2,  3,  4,  5,  6],
        [ 7,  8,  9, 10, 11, 12],
        [13, 14, 15, 16, 17, 18]]])

### **Transpose**

In [40]:
a = np.arange(1,17)
a = a.reshape(4,4)
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

In [41]:
a.T

array([[ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15],
       [ 4,  8, 12, 16]])

In [42]:
np.transpose(a)

array([[ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15],
       [ 4,  8, 12, 16]])

### **Indexing in 2D Array**

In [43]:
a 

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

In [44]:
a[2] # Entire row

array([ 9, 10, 11, 12])

In [45]:
a[:,2] # Entire column

array([ 3,  7, 11, 15])

In [46]:
a [2,3] # specific element

12

In [47]:
a[2][3] # Get 3rd row and then 4 element

12

In [48]:
a[[1,2]] # get 2nd and 3rd rows

array([[ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [49]:
a[[1,2],[2,3]] # get 2nd and 3rd rows and 3rd and 4th columns

array([ 7, 12])

In [50]:
a[1:3,1:3]

array([[ 6,  7],
       [10, 11]])

In [51]:
a[1:3][1:3]

array([[ 9, 10, 11, 12]])

#### Why difference?

First one takes row and columns

Second one takes rows and creates a temp array and on that again takes rows

### **Masking**

In [52]:
r = np.random.randint(1,100, 15)
mask = r > 15

In [53]:
mask

array([ True, False,  True,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True,  True])

In [54]:
r[mask]

array([74, 50, 93, 49, 52, 99, 80, 92, 23, 73, 69, 27, 46])

In [55]:
r2 = np.random.randint(1,50,16).reshape(4,4)
r2

array([[11, 20,  3, 33],
       [ 5,  5, 48, 11],
       [48, 48, 15, 39],
       [18, 40,  9, 28]])

In [56]:
mask = r2<7
mask

array([[False, False,  True, False],
       [ True,  True, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [57]:
r2[mask]

array([3, 5, 5])

In [58]:
r2[mask] = 9999 # assigning at once
r2

array([[  11,   20, 9999,   33],
       [9999, 9999,   48,   11],
       [  48,   48,   15,   39],
       [  18,   40,    9,   28]])

### **Aggregate functions**

In [59]:
a = np.arange(16)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [60]:
print("Sum", np.sum(a))
print("Average: ", np.mean(a))
print("Min: ", np.min(a))
print("Max: ", np.max(a))

Sum 120
Average:  7.5
Min:  0
Max:  15


In [61]:
b = np.random.randint(1,200, (4,4))
b

array([[ 44,  27, 120,  89],
       [112,  17,  36,  28],
       [116,  80,  82,  84],
       [111,  61,  69,  17]])

In [62]:
print("Sum: ", np.sum(b))
print("Mean: ", np.mean(b))
print("Min: ",np.min(b))
print("Max: ", np.max(b))

Sum:  1093
Mean:  68.3125
Min:  17
Max:  120


In [63]:
# Row wise
print("Sum: ", np.sum(b, axis = 0))
print("Mean: ", np.mean(b, axis = 0))
print("Min: ",np.min(b, axis = 0))
print("Max: ", np.max(b, axis = 0))

Sum:  [383 185 307 218]
Mean:  [95.75 46.25 76.75 54.5 ]
Min:  [44 17 36 17]
Max:  [116  80 120  89]


In [64]:
# Column wise
print("Sum: ", np.sum(b, axis = 1))
print("Mean: ", np.mean(b, axis = 1))
print("Min: ",np.min(b, axis = 1))
print("Max: ", np.max(b, axis = 1))

Sum:  [280 193 362 258]
Mean:  [70.   48.25 90.5  64.5 ]
Min:  [27 17 80 17]
Max:  [120 112 116 111]


### **Logical Functions**

In [65]:
mask1 = [True, False, True, True]
np.any(mask1), np.all(mask1)

(True, False)

In [66]:
prices = np.array([50,45,25,20,35,70])
budget = 40
# any prices less than 40 ?
np.any(prices < budget)

True

In [67]:
# All prices less than 40
np.all(prices < budget)

False

In [68]:
arr = np.random.randint(-20, 20, 10)
arr

array([-13,   7,  10,  -8,  -5,   4,  -4,  15,  12,   9])

In [69]:
np.where(arr < 0, "invalid",arr)

array(['invalid', '7', '10', 'invalid', 'invalid', '4', 'invalid', '15',
       '12', '9'], dtype='<U21')

In [70]:
np.where(arr > 5, arr *0.9,arr)

array([-13. ,   6.3,   9. ,  -8. ,  -5. ,   4. ,  -4. ,  13.5,  10.8,
         8.1])

### **Fitbit**

In [71]:
fit = np.loadtxt("fit.txt", dtype = 'str')
fit

array([['06-10-2017', '5464', 'Neutral', '181', '5', 'Inactive'],
       ['07-10-2017', '6041', 'Sad', '197', '8', 'Inactive'],
       ['08-10-2017', '25', 'Sad', '0', '5', 'Inactive'],
       ['09-10-2017', '5461', 'Sad', '174', '4', 'Inactive'],
       ['10-10-2017', '6915', 'Neutral', '223', '5', 'Active'],
       ['11-10-2017', '4545', 'Sad', '149', '6', 'Inactive'],
       ['12-10-2017', '4340', 'Sad', '140', '6', 'Inactive'],
       ['13-10-2017', '1230', 'Sad', '38', '7', 'Inactive'],
       ['14-10-2017', '61', 'Sad', '1', '5', 'Inactive'],
       ['15-10-2017', '1258', 'Sad', '40', '6', 'Inactive'],
       ['16-10-2017', '3148', 'Sad', '101', '8', 'Inactive'],
       ['17-10-2017', '4687', 'Sad', '152', '5', 'Inactive'],
       ['18-10-2017', '4732', 'Happy', '150', '6', 'Active'],
       ['19-10-2017', '3519', 'Sad', '113', '7', 'Inactive'],
       ['20-10-2017', '1580', 'Sad', '49', '5', 'Inactive'],
       ['21-10-2017', '2822', 'Sad', '86', '6', 'Inactive'],
       ['22-10

In [72]:
type(fit)

numpy.ndarray

In [73]:
fit.shape, fit.size, fit.ndim

((96, 6), 576, 2)

In [74]:
fit[0]

array(['06-10-2017', '5464', 'Neutral', '181', '5', 'Inactive'],
      dtype='<U10')

In [75]:
fit.T

array([['06-10-2017', '07-10-2017', '08-10-2017', '09-10-2017',
        '10-10-2017', '11-10-2017', '12-10-2017', '13-10-2017',
        '14-10-2017', '15-10-2017', '16-10-2017', '17-10-2017',
        '18-10-2017', '19-10-2017', '20-10-2017', '21-10-2017',
        '22-10-2017', '23-10-2017', '24-10-2017', '25-10-2017',
        '26-10-2017', '27-10-2017', '28-10-2017', '29-10-2017',
        '30-10-2017', '31-10-2017', '01-11-2017', '02-11-2017',
        '03-11-2017', '04-11-2017', '05-11-2017', '06-11-2017',
        '07-11-2017', '08-11-2017', '09-11-2017', '10-11-2017',
        '11-11-2017', '12-11-2017', '13-11-2017', '14-11-2017',
        '15-11-2017', '16-11-2017', '17-11-2017', '18-11-2017',
        '19-11-2017', '20-11-2017', '21-11-2017', '22-11-2017',
        '23-11-2017', '24-11-2017', '25-11-2017', '26-11-2017',
        '27-11-2017', '28-11-2017', '29-11-2017', '30-11-2017',
        '01-12-2017', '02-12-2017', '03-12-2017', '04-12-2017',
        '05-12-2017', '06-12-2017', '07-

In [76]:
fit.T.shape

(6, 96)

In [77]:
a,b,c,d = np.array([[1],[2],[3],[4]])
a,b,c,d

(array([1]), array([2]), array([3]), array([4]))

In [78]:
date, step_count,mood, calories_burned, hours_sleep, activity_status = fit.T

In [79]:
date.dtype, step_count.dtype,mood.dtype, calories_burned.dtype, hours_sleep.dtype, activity_status.dtype

(dtype('<U10'),
 dtype('<U10'),
 dtype('<U10'),
 dtype('<U10'),
 dtype('<U10'),
 dtype('<U10'))

In [80]:
step_count = step_count.astype("int64")
hours_sleep = hours_sleep.astype("int64")
calories_burned = calories_burned.astype("int64")

In [81]:
date.dtype, step_count.dtype,mood.dtype, calories_burned.dtype, hours_sleep.dtype, activity_status.dtype

(dtype('<U10'),
 dtype('int64'),
 dtype('<U10'),
 dtype('int64'),
 dtype('int64'),
 dtype('<U10'))

In [82]:
np.unique(mood)

array(['Happy', 'Neutral', 'Sad'], dtype='<U10')

In [83]:
fit[mood == "Happy"]

array([['18-10-2017', '4732', 'Happy', '150', '6', 'Active'],
       ['29-10-2017', '330', 'Happy', '10', '6', 'Inactive'],
       ['31-10-2017', '4550', 'Happy', '150', '8', 'Active'],
       ['01-11-2017', '4435', 'Happy', '141', '5', 'Inactive'],
       ['02-11-2017', '4779', 'Happy', '156', '4', 'Inactive'],
       ['03-11-2017', '1831', 'Happy', '57', '5', 'Inactive'],
       ['04-11-2017', '2255', 'Happy', '72', '4', 'Inactive'],
       ['05-11-2017', '539', 'Happy', '17', '5', 'Active'],
       ['06-11-2017', '5464', 'Happy', '181', '4', 'Inactive'],
       ['08-11-2017', '4068', 'Happy', '131', '2', 'Inactive'],
       ['09-11-2017', '4683', 'Happy', '154', '9', 'Inactive'],
       ['10-11-2017', '4033', 'Happy', '137', '5', 'Inactive'],
       ['11-11-2017', '6314', 'Happy', '193', '6', 'Active'],
       ['12-11-2017', '614', 'Happy', '19', '4', 'Active'],
       ['13-11-2017', '3149', 'Happy', '101', '5', 'Active'],
       ['14-11-2017', '4005', 'Happy', '139', '8', 'Active']

In [84]:
step_count[mood == "Happy"]

array([4732,  330, 4550, 4435, 4779, 1831, 2255,  539, 5464, 4068, 4683,
       4033, 6314,  614, 3149, 4005, 4880, 4136,  705,  269, 4275, 5999,
       4421, 6930, 5195,  546,  493,  995, 3608,  774, 1421, 4064, 2725,
       5934, 1867, 7422, 5537, 5376,  153, 2203])

In [85]:
# Mean of Mood happy
np.mean(step_count[mood == "Happy"])

3392.725

In [86]:
np.mean(step_count[mood == "Sad"])

2103.0689655172414

### **Sorting Array**

In [87]:
a = np.random.randint(1,100,10)
a

array([54, 33, 30, 93, 45, 72, 57, 65, 74, 43])

In [89]:
# Temp
np.sort(a),a

(array([30, 33, 43, 45, 54, 57, 65, 72, 74, 93]),
 array([54, 33, 30, 93, 45, 72, 57, 65, 74, 43]))

In [90]:
a.sort()

In [91]:
a

array([30, 33, 43, 45, 54, 57, 65, 72, 74, 93])

#### **2D Array**

**Why This Makes Sense**

    Axis Definition: In NumPy, axis=0 always refers to the first dimension (rows in 2D), and axis=1 refers to the second dimension (columns in 2D). Sorting along an axis sorts the elements in that dimension while grouping by the other dimension.
    Consistency: This convention is consistent with other NumPy operations (e.g., sum, mean):
        axis=0: Operates along rows, producing results for each column.
        axis=1: Operates along columns, producing results for each row.
    Higher Dimensions: The logic extends to higher-dimensional arrays. For a 3D array, axis=0 sorts along the first dimension (depth), axis=1 along the second (rows), and axis=2 along the third (columns).

**For a 2D array:**

    axis=0 (column-wise): Sorts each column independently, moving values along rows.
    axis=1 (row-wise): Sorts each row independently, moving values along columns.

In [92]:
a = np.random.randint(1,100,16)
b = a.reshape(4,4)
b

array([[11, 81, 78, 71],
       [ 1, 14, 12,  6],
       [50, 96, 49, 42],
       [85, 90, 71, 27]])

In [93]:
np.sort(b)

array([[11, 71, 78, 81],
       [ 1,  6, 12, 14],
       [42, 49, 50, 96],
       [27, 71, 85, 90]])

In [94]:
np.sort(b, axis = 0)

array([[ 1, 14, 12,  6],
       [11, 81, 49, 27],
       [50, 90, 71, 42],
       [85, 96, 78, 71]])

In [95]:
np.sort(b, axis = 1)

array([[11, 71, 78, 81],
       [ 1,  6, 12, 14],
       [42, 49, 50, 96],
       [27, 71, 85, 90]])

#### **3D Arrays**

In [96]:
a = np.random.randint(1, 1000,(3,5,5))
a

array([[[234, 527, 238, 281, 765],
        [785,  50, 580, 737, 654],
        [874, 138, 263, 754, 133],
        [375, 918, 915, 849,  77],
        [208, 304, 983, 756, 776]],

       [[180, 799, 846, 154,  84],
        [442, 641,  99, 716, 102],
        [901, 573, 430, 440, 476],
        [ 33, 280, 144, 688,  72],
        [283, 919, 530, 745, 282]],

       [[796, 745, 969, 602, 404],
        [648, 443, 569,  19, 885],
        [259, 750, 891, 103,  21],
        [858, 225, 193, 396,  96],
        [770, 726, 512, 556,  71]]])

In [98]:
np.sort(a, axis = 0)

array([[[180, 527, 238, 154,  84],
        [442,  50,  99,  19, 102],
        [259, 138, 263, 103,  21],
        [ 33, 225, 144, 396,  72],
        [208, 304, 512, 556,  71]],

       [[234, 745, 846, 281, 404],
        [648, 443, 569, 716, 654],
        [874, 573, 430, 440, 133],
        [375, 280, 193, 688,  77],
        [283, 726, 530, 745, 282]],

       [[796, 799, 969, 602, 765],
        [785, 641, 580, 737, 885],
        [901, 750, 891, 754, 476],
        [858, 918, 915, 849,  96],
        [770, 919, 983, 756, 776]]])

In [99]:
np.sort(a, axis = 1)

array([[[208,  50, 238, 281,  77],
        [234, 138, 263, 737, 133],
        [375, 304, 580, 754, 654],
        [785, 527, 915, 756, 765],
        [874, 918, 983, 849, 776]],

       [[ 33, 280,  99, 154,  72],
        [180, 573, 144, 440,  84],
        [283, 641, 430, 688, 102],
        [442, 799, 530, 716, 282],
        [901, 919, 846, 745, 476]],

       [[259, 225, 193,  19,  21],
        [648, 443, 512, 103,  71],
        [770, 726, 569, 396,  96],
        [796, 745, 891, 556, 404],
        [858, 750, 969, 602, 885]]])

In [100]:
np.sort(a, axis = 2)

array([[[234, 238, 281, 527, 765],
        [ 50, 580, 654, 737, 785],
        [133, 138, 263, 754, 874],
        [ 77, 375, 849, 915, 918],
        [208, 304, 756, 776, 983]],

       [[ 84, 154, 180, 799, 846],
        [ 99, 102, 442, 641, 716],
        [430, 440, 476, 573, 901],
        [ 33,  72, 144, 280, 688],
        [282, 283, 530, 745, 919]],

       [[404, 602, 745, 796, 969],
        [ 19, 443, 569, 648, 885],
        [ 21, 103, 259, 750, 891],
        [ 96, 193, 225, 396, 858],
        [ 71, 512, 556, 726, 770]]])