In [218]:
import numpy as np
np.__version__

'1.23.5'

## Differences between lists and NumPy Arrays
* An array's size is immutable.  You cannot append, insert or remove elements, like you can with a list.
* All of an array's elements must be of the same [data type](https://docs.scipy.org/doc/numpy-1.14.0/user/basics.types.html).
* A NumPy array behaves in a Pythonic fashion.  You can `len(my_array)` just li


In [219]:
gpas_as_list = [4.0,3.286,3.5,4.0]

In [220]:
gpas = np.array(gpas_as_list)

In [221]:
students_gpas = np.array([
    [4.0,3.286,3.5,4.0],
    [3.2, 3.8, 4.0, 4.0],
    [3.96, 3.92, 4.0, 4.0]
    
], np.float16)
students_gpas

array([[4.   , 3.285, 3.5  , 4.   ],
       [3.2  , 3.8  , 4.   , 4.   ],
       [3.96 , 3.92 , 4.   , 4.   ]], dtype=float16)

In [222]:
students_gpas.ndim

2

In [223]:
students_gpas.shape

(3, 4)

In [224]:
#all the values 
students_gpas.size
#Number of rows (or students in this case)
len(students_gpas)

3

In [225]:
#bytes being used for each item
students_gpas.itemsize

2

In [226]:
#calculating the total amount of space used

students_gpas.itemsize * students_gpas.size

24

In [227]:
%whos ndarray

Variable        Type       Data/Info
------------------------------------
fake_log        ndarray    100: 100 elems, type `uint16`, 200 bytes
gpas            ndarray    4: 4 elems, type `float64`, 32 bytes
index           ndarray    2x2: 4 elems, type `int32`, 16 bytes
students_gpas   ndarray    3x4: 12 elems, type `float16`, 24 bytes
study_minutes   ndarray    100: 100 elems, type `uint16`, 200 bytes
tester          ndarray    3x100: 300 elems, type `uint16`, 600 bytes


In [228]:
#Another way fo accessing the information
np.info(students_gpas)

class:  ndarray
shape:  (3, 4)
strides:  (8, 2)
itemsize:  2
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x221ecdad720
byteorder:  little
byteswap:  False
type: float16


In [229]:
students_gpas[2]

array([3.96, 3.92, 4.  , 4.  ], dtype=float16)

In [230]:
students_gpas[2][2]

4.0

##About data types


* By choosing the proper [data type](https://docs.scipy.org/doc/numpy-1.14.0/user/basics.types.html) you can greatly reduce the size required to store objects
* Data types are maintained by wrapping values in a [scalar representation](https://docs.scipy.org/doc/numpy-1.14.0/reference/arrays.scalars.html)
* `np.zeros` is a handy way to create an empty array filled with zeros.

In [231]:
study_minutes = np.zeros(100, np.uint16)
study_minutes

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint16)

In [232]:
%whos


Variable            Type           Data/Info
--------------------------------------------
fake_log            ndarray        100: 100 elems, type `uint16`, 200 bytes
first_day_minutes   uint16         150
gpas                ndarray        4: 4 elems, type `float64`, 32 bytes
gpas_as_list        list           n=4
index               ndarray        2x2: 4 elems, type `int32`, 16 bytes
np                  module         <module 'numpy' from 'c:\<...>ges\\numpy\\__init__.py'>
os                  module         <module 'os' from 'C:\\Pr<...>s\\Python39\\lib\\os.py'>
rand                RandomState    RandomState(MT19937)
students_gpas       ndarray        3x4: 12 elems, type `float16`, 24 bytes
study_minutes       ndarray        100: 100 elems, type `uint16`, 200 bytes
sys                 module         <module 'sys' (built-in)>
tester              ndarray        3x100: 300 elems, type `uint16`, 600 bytes


In [233]:
60 * 24

1440

In [234]:
study_minutes[0]

0

In [235]:
study_minutes[0] = 150

In [236]:
first_day_minutes = study_minutes[0]

In [237]:
first_day_minutes

150

In [238]:
type(first_day_minutes)

numpy.uint16

In [239]:
study_minutes[1] = 60

In [240]:
study_minutes

array([150,  60,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0], dtype=uint16)

In [241]:
study_minutes[2:6] = [80, 60, 30, 90]

## Multidimensional Arrays

* The data structure is actually called `ndarray`, representing any **n**umber of **d**imensions
* Arrays can have multiple dimensions, you declare them on creation
* Dimensions help define what each element in the array represents.  A two dimensional array is just an array of arrays
* **Rank** defines how many dimensions an array contains 
* **Shape** defines the length of each of the array's dimensions
* Each dimension is also referred to as an **axis**, and they are zero-indexed. Multiples are called **axes**.
* A 2d array is AKA **matrix**.

In [242]:
#Section 2 of the tutoriial


###Creation
*Random State
*Appending rows

##Indexing
*Shortcut(tuple)
*Fancy Indexing

In [243]:

# what this code is doign is make a 2D array...without having to do a nested loop and loop
tester = np.array([study_minutes, np.zeros(100, np.uint16)])
#the shape reveals the that thear 2 arrays at the size of 100 
#tester.shape
#IMPORTANT POINT THAT if you try to put together different sized arrays, you'll need to specify the dtype into object 
# tester = np.array([study_minutes, np.zeros(99, np.uint16)],dtype=object)
tester

array([[150,  60,  80,  60,  30,  90,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [244]:
#accessor 
tester[1][0]= 60

"""when you have nnumbers that are seperated in by comma inside [], a tuple is assumeed so...
tester[1,0]
really means this.. 
tester[(1, 0)]
"""




'when you have nnumbers that are seperated in by comma inside [], a tuple is assumeed so...\ntester[1,0]\nreally means this.. \ntester[(1, 0)]\n'

In [245]:
#Fancy Indexing

# create fake data 
rand = np.random.RandomState(42)
fake_log = rand.randint(30, 180, size=100, dtype=np.uint16)
fake_log

array([132, 122, 128,  44, 136, 129, 101,  95,  50, 132, 151,  64, 104,
       175, 117, 146, 139, 129, 133, 176,  98, 160, 179,  99,  82, 142,
        31, 106, 117,  56,  98,  67, 121, 159,  81, 170,  31,  50,  49,
        87, 179,  51, 116, 177, 118,  78, 171, 117,  88, 123, 102,  44,
        79,  31, 108,  80,  59, 137,  84,  93, 155, 160,  67,  80, 166,
       164,  70,  50, 102, 113,  47, 131, 161, 118,  82,  89,  81,  43,
        81,  38, 119,  52,  82,  31, 159,  57, 113,  71, 121, 140,  91,
        70,  37, 106,  64, 127, 110,  58,  93,  79], dtype=uint16)

In [246]:
[fake_log[3], fake_log[8]]
#same thing
fake_log[3], fake_log[8]

#Fancy Indexing, the double [] is reference the first array, which is necessary because fake_log is 1d array with only one thing to index
fake_log[[0,99]]

array([132,  79], dtype=uint16)

In [247]:
fake_log[[0,0]]

array([132, 132], dtype=uint16)

In [248]:
index = np.array([
    [3, 8], 
    [0, 1]
])

In [249]:
fake_log[index]

array([[ 44,  50],
       [132, 122]], dtype=uint16)

In [259]:

#One hack is make the fake_log a list of list inroder to append it to the tester by wrapping into []
tester = np.append(tester, [fake_log], axis=0 )
tester

In [260]:
tester



array([[150,  60,  80,  60,  30,  90,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ 60,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [281]:

#we are not using a fancy index - we are using tuple/comma shortcut - this is will return a single item 
tester[1,1] = 360 #tester[(1,1)]

#Rememeber for 1D array it's [[]] - which fance ex. fake_log = [[0,1]] ,it is a           list of so it will return an array of the selected 

#need to know what happening in the outer brackets, need to see if it's a tuple or list

array([132, 122, 128,  44], dtype=uint16)

## RECAP

## Creation 
* You can create a random but bound grouping of values using the `np.random` package.  
  * `RandomState` lets you seed your randomness in a way that is repeatable.
* You can append a row in a couple of ways
   * You can use the `np.append` method.  Make sure the new row is the same shape.
   * You can create/reassign a new array by including the existing array as part of the iterable in creation.


## Indexing
* You can use an indexing shortcut by separating dimensions with a comma.  
* You can index using a `list` or `np.array`.  Values will be pulled out at that specific index.  This is known as fancy indexing.
  * Resulting array shape matches the index array layout.  Be careful to distinguish between the tuple shortcut and fancy indexing.


In [280]:
#new Section Boolean Indexing

In [286]:
#similar like pandas this produces boolean array "Less than 60"
fake_log < 60

array([44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52, 31,
       57, 37, 58], dtype=uint16)

In [287]:
#The boolean array and be use to index a an array with same size... in this case we'll use the same array it originated from... 
fake_log[fake_log < 60]

array([44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52, 31,
       57, 37, 58], dtype=uint16)

In [289]:
# the equlivent of the prior but in more verbose code 

results = []
for value in fake_log:
    if value < 60:
        results.append(value)
np.array(results)

array([44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52, 31,
       57, 37, 58], dtype=uint16)

In [291]:
#returned a one dimensional array of the matched values
tester[tester < 60]

array([30,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0, 44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59,
       50, 47, 43, 38, 52, 31, 57, 37, 58], dtype=uint16)

In [292]:
#Compare two np.arrays by using the bitwise operator "&"
#item by item in each array get's compared to each other
#Remember to produce a True both items need to be True
np.array([False, True, True]) & np.array([True, False, True])

array([False, False,  True])

In [293]:
#consider the order of operateration and the parthensis in this scenerio
tester[(tester > 0) & (tester < 60)]

array([30, 44, 50, 31, 56, 31, 50, 49, 51, 44, 31, 59, 50, 47, 43, 38, 52,
       31, 57, 37, 58], dtype=uint16)

In [297]:
#use the boolean table with assignment ... to selectively re-assign the values that meet boolean condition 
#other words "assign and update"
tester[tester < 60] = 0

In [298]:
tester

array([[150,  60,  80,  60,   0,  90,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ 60, 360,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [311]:
tester[(2)]

array([132, 122, 128,   0, 136, 129, 101,  95,   0, 132, 151,  64, 104,
       175, 117, 146, 139, 129, 133, 176,  98, 160, 179,  99,  82, 142,
         0, 106, 117,   0,  98,  67, 121, 159,  81, 170,   0,   0,   0,
        87, 179,   0, 116, 177, 118,  78, 171, 117,  88, 123, 102,   0,
        79,   0, 108,  80,   0, 137,  84,  93, 155, 160,  67,  80, 166,
       164,  70,   0, 102, 113,   0, 131, 161, 118,  82,  89,  81,   0,
        81,   0, 119,   0,  82,   0, 159,   0, 113,  71, 121, 140,  91,
        70,   0, 106,  64, 127, 110,   0,  93,  79], dtype=uint16)

## Boolean Array Indexing
* You can create a boolean array by using comparison operators on an array.
  * You can use boolean arrays for fancy indexing.
  * Boolean arrays can be compared by using bitwise operators (`&`, `|`)
      * Do not use the `and` keyword.
      * Remember to mind the order of operations when combining
* Even though boolean indexing returns a new array, you can update an existing array using a boolean index.

fruit = ["Banana", "StrawBerr", "Watermelon", "Pinapple", "Kiwi", ]

In [314]:
fruit = ["Banana", "StrawBerr", "Watermelon", "Pinapple", "Kiwi", ]

In [317]:
#Slicing Examples

#index 0 until index 4 non exlcusive
fruit[:4]
#until index 4 non exlcusive til the last index
fruit[4:]
#Creating a copy- does not effect the org 
fruitCopy = fruit[:]
fruitCopy[3] = "Cherry"
print(fruit)
print(fruitCopy)

#skipping per 3 elements 
fruit[::3]

['Banana', 'Pinapple']

In [320]:
print(fruit)
print(fruitCopy)

['Banana', 'StrawBerr', 'Watermelon', 'Pinapple', 'Kiwi']
['Banana', 'StrawBerr', 'Watermelon', 'Cherry', 'Kiwi']


In [337]:
#Np slicing Exmaples

practise = np.arange(42)
practise[[3,8]]

#change the shape utlimately make it 2-d array, or arrays of arrays 
practise.shape = (7,6)

In [345]:
#the ndarray is pythonic... 
#calling the third row
practise[2]
#third row , second column value 
practise[2,1]
#Selection of rows
practise[2:5]
#same row selection but capture the fourth columns
practise[2:5, 3]
#^ *same* ^ - but fourth column  til the last column 
practise[2:5, 3:]
#^ *same* ^ - but every other column insteat
practise[2:5, 3::2]



array([[15, 17],
       [21, 23],
       [27, 29]])

In [346]:
#Np.array DO NOT RETURN A COPY BUT RETURN A VIEW
not_copied= practise[:]
not_copied[0, 0 ] = 90210
#both of them changed... not_copied is pointing to the practise
practise, not_copied

(array([[90210,     1,     2,     3,     4,     5],
        [    6,     7,     8,     9,    10,    11],
        [   12,    13,    14,    15,    16,    17],
        [   18,    19,    20,    21,    22,    23],
        [   24,    25,    26,    27,    28,    29],
        [   30,    31,    32,    33,    34,    35],
        [   36,    37,    38,    39,    40,    41]]),
 array([[90210,     1,     2,     3,     4,     5],
        [    6,     7,     8,     9,    10,    11],
        [   12,    13,    14,    15,    16,    17],
        [   18,    19,    20,    21,    22,    23],
        [   24,    25,    26,    27,    28,    29],
        [   30,    31,    32,    33,    34,    35],
        [   36,    37,    38,    39,    40,    41]]))

In [348]:
practise.base is None 
not_copied.base is None
not_copied.base is practise

True

In [349]:
#poroperty flags is dict and the value "owndata" - to check if vairable owns the data
practise.flags['OWNDATA'], not_copied.flags['OWNDATA']

(True, False)

## Slicing
* Works a lot like normal list slicing.
* You can use commas to separate each dimension slice.
* Always returns a data view
* You can access the base object using the `ndarray.base` property