# DATA ANALYSIS WITH PYTHON


In [1]:
import numpy as np

In [2]:
dat = {i: np.random.randn() for i in range(7)}

In [3]:
dat

{0: 0.49042022535801333,
 1: 0.2431145213381427,
 2: 0.8367838696275055,
 3: 0.23424384998313216,
 4: -0.5151739166655485,
 5: 0.7770222835447722,
 6: -0.4244864885047254}

In [4]:
print(dat)

{0: 0.49042022535801333, 1: 0.2431145213381427, 2: 0.8367838696275055, 3: 0.23424384998313216, 4: -0.5151739166655485, 5: 0.7770222835447722, 6: -0.4244864885047254}


#### DOCSTRINGS

In [5]:
def add_numbers(a, b):
    """
    Add two numbers together
    Returns
    -------
    the_sum : type of arguments
    """
    return a + b

In [6]:
getattr('foo', 'split')

<function str.split(sep=None, maxsplit=-1)>

In [7]:
f = '''
jsjhsfhdhgfh
hjdsfhgdshgfh
'''

In [8]:
f.count('\n')

3

**STRING FORMATTING**
=====================

In [9]:
draft = '{0:.3f} {1:s} is {2:d}'.format(3.45468903, 'Maswa',345)
print(draft)

3.455 Maswa is 345


### ENCODING AND DECODING STRINGS TO AND FROM UNICODE

In this bl

In [10]:
STR = 'español'
encoded = STR.encode('utf-8')

In [11]:
encoded

b'espa\xc3\xb1ol'

In [12]:
type(encoded)

bytes

In [13]:
decoda = encoded.decode('utf-8')

In [14]:
decoda

'español'

In [15]:
print(decoda.encode('latin1'))

b'espa\xf1ol'


In [16]:
print(decoda.encode('utf-16'))

b'\xff\xfee\x00s\x00p\x00a\x00\xf1\x00o\x00l\x00'


In [17]:
print(decoda.encode('utf-16le'))

b'e\x00s\x00p\x00a\x00\xf1\x00o\x00l\x00'


TERNARY OPERATORS
===================
are used to shorten an if-else statement that produces a value into a single line, eg. 

In [18]:
c='smile'
'EXCITED' if c == 'smile' else 'SAD'

'EXCITED'

TUPLES
------------
are immutable but if an object in a tuple such as a list is mutable then you do the following;

In [19]:
tup = (4,'marcy',77,[4,6,7],'sassy')
tup[3].append(34)
tup

(4, 'marcy', 77, [4, 6, 7, 34], 'sassy')

see that the value in the list was changed cause it is mutable.

Unpacking tuples
-------------------------

In [20]:
a,b,c,d,e = tup
a

4

In [21]:
b

'marcy'

In [22]:
c

77

In [23]:
d

[4, 6, 7, 34]

In [24]:
e

'sassy'

Notice all the values in the tuple has not unpacked to the variables in their respective position in the tuple.

In [25]:
a_seq = [(3,4,5),(3,6,8)]
for i,j,k in a_seq:
    print('i={0} j={1} k={2}'.format(i,j,k))

i=3 j=4 k=5
i=3 j=6 k=8


UNPACKING TUPLE VALUES WITH *
----------------

In [26]:
a_tup = (6,7,4,66,44,7,4,0)
a,b,*_ = a_tup
print(a)
print(b)
print(_) #* automatically assigns the remaining values in the tup in a list-like format to the variable following *

6
7
[4, 66, 44, 7, 4, 0]


In [27]:
a_tup.count(4) #counting the number of times 4 appears in the tuple

2

### SORTING LISTS

In [28]:
a_list = ['ama','zipper','bag','tree','socrito']
a_list.sort(key=len)     #re-orders the list according to the length of each word
a_list

['ama', 'bag', 'tree', 'zipper', 'socrito']

In [29]:
a_list.sort()    #re-orders the list alphabetically
a_list

['ama', 'bag', 'socrito', 'tree', 'zipper']

BINARY SEARCH AND MAINTAINING SORTED LIST
===================

In [30]:
import bisect
an_arr = [4,5,3,88,5,7,9,4,3,5,7,5]
bisect.bisect(an_arr,7)    #finds the location where a value would be inserted in the list

3

In [31]:
bisect.insort(an_arr, 3)     #inserts a value to a list in that location
an_arr 

[3, 4, 5, 3, 88, 5, 7, 9, 4, 3, 5, 7, 5]

### LIST

In [32]:
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[::-2]    #reverse count in two's

[1, 6, 7, 2]

In [33]:
seq[::-1]     #reverse list

[1, 0, 6, 5, 7, 3, 2, 7]

PYTHON ENUMERATE
==
#### used to keep track of the index of a value during iteration.

In [34]:
for i,v in enumerate(range(8)):          #the first value is used as the index value.
    print(v)
print('indexed',i)

0
1
2
3
4
5
6
7
indexed 7


In [35]:
a_dict={}
_list = ['gre','foo','bar','api']
for idx,val in enumerate(_list):
    a_dict[val]=idx
a_dict

{'gre': 0, 'foo': 1, 'bar': 2, 'api': 3}

### SORTED
*RETURNS A NEW SORTED LISTED OF VALUES IN ANY SEQUENCE*

In [36]:
    sorted(_list)

['api', 'bar', 'foo', 'gre']

In [37]:
sorted('Raphael Bambi')

[' ', 'B', 'R', 'a', 'a', 'a', 'b', 'e', 'h', 'i', 'l', 'm', 'p']

In [38]:
sorted([3,7,4,0,8,3,7,6])

[0, 3, 3, 4, 6, 7, 7, 8]

ZIP
---
### combines values in tuplues,lists in their exact position into list of tuples

In [39]:
list_one = ['max','diff','sum','pen']
list_two = ['den','soy','add','axe']
a_zip = zip(list_one,list_two)
this = list(a_zip)
this

[('max', 'den'), ('diff', 'soy'), ('sum', 'add'), ('pen', 'axe')]

In [40]:
#using zip and enumerate to iterate through multiple sequences
for i,(a,b) in enumerate(zip(list_one,list_two)):
    print('{0}: --- {1} --- {2}'.format(i,a,b))

0: --- max --- den
1: --- diff --- soy
2: --- sum --- add
3: --- pen --- axe


In [41]:
#unzip zipped values
one, two = zip(*this)

In [42]:
one

('max', 'diff', 'sum', 'pen')

In [43]:
two

('den', 'soy', 'add', 'axe')

In [44]:
dd ={'a': 'some value',
'b': [1, 2, 3, 4],
7: 'an integer',
'dummy': 'another value'}
red = dd.pop('dummy')     #this removes dummy from the dict and assigns its value to the variable, 'red'
red


'another value'

In [45]:
dd

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [46]:
dd.update({'dee': [3, 4],
'rrr': 'ande',})     #merge dictionaries using the update function.

In [47]:
dd

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 'dee': [3, 4],
 'rrr': 'ande'}

USING ZIP TO MAP KEY AND VALUES FROM TWO LISTS
=======

In [48]:
key = dd.keys()
values = dd.values()
mapping={}
for k,x in zip(key, values):
    mapping[k]=x
mapping

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 'dee': [3, 4],
 'rrr': 'ande'}

In [49]:
mapp = dict(zip(range(8), reversed(range(8))))    
"""
this creates two lists for numbers between 0 and 8
in the first list, and the reverse order of numbers
between 0-8 in the second list, then maps them together
in their respective position and makes them key&value
pairs of the dict then assigns it to mapp.
"""
mapp


{0: 7, 1: 6, 2: 5, 3: 4, 4: 3, 5: 2, 6: 1, 7: 0}

SETS
--------
### are unordered collections of unique elements like the dict but with no value pair used in mathematical operations like the mathematically sets 

In [50]:
at = {1, 2, 3, 4, 5,9}
bt = {4,5,6,7,8,1, 2, 3, 4, 5}

### UNION OF SETS
combining the values of sets into one 

In [51]:
at|bt

{1, 2, 3, 4, 5, 6, 7, 8, 9}

In [52]:
at.union(bt)

{1, 2, 3, 4, 5, 6, 7, 8, 9}

### INTERSECTION OF SETS
consists of similar values in sets

In [53]:
at & bt

{1, 2, 3, 4, 5}

In [54]:
at.intersection(bt)

{1, 2, 3, 4, 5}

### DIFFERENCE OF SETS
picks elements in at that are not in bt

In [55]:
at.difference(bt)  #where there is no difference it returns set()

{9}

In [56]:
at-bt

{9}

In [57]:
at.issubset(bt)    #checks that the elements in at are all in bt 

False

In [58]:
at.issuperset(bt)   #checks that the elements in bt are contained in at

False

In [59]:
at.isdisjoint(bt)    #checks for the occurrence of common elements in at & bt and returns where there are none 

False

In [60]:
at^bt   #returns elements that are not in bt but at and vice versa

{6, 7, 8, 9}

In [61]:
at.symmetric_difference_update(bt)    #sets the values in at to the values returned from at^bt

In [62]:
at

{6, 7, 8, 9}

In [63]:
ct = at.copy()
ct

{6, 7, 8, 9}

LIST COMPREHENSION
========
### [expr for val in collection if condition]

In [64]:
#This
string_vals = ['ama', 'bag', 'tree', 'zipper', 'socrito','coco','at']
a_list = []
for x in string_vals:
    if len(x) > 3:
        a_list.append(x.upper())
a_list

['TREE', 'ZIPPER', 'SOCRITO', 'COCO']

In [65]:
#can be re-written as;
[x.upper() for x in string_vals if len(x)>3]

['TREE', 'ZIPPER', 'SOCRITO', 'COCO']

SIMPLE IMPLEMENTATION OF DATA CLEANING
-----------------------

In [66]:
states = ['Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south carolina##', 'West virginia?']

In [67]:
import re
def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()     #remove whitespaces
        value = re.sub('[!#?]', '', value)     #replace the occurrences of !#? with nothing
        value = value.title()   #change case to titlecase
        result.append(value)     #append the values to result
    return result

clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [68]:
def remove_punctuation(value):
    '''
    takes in values and removes occurrences of the
    following characters and returns the new value
    '''
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]  
'''
Calls the remove_punctuation function, then
changes the case to title case 
then removes whitespaces and returns a new value saved to clean_ops
'''

def clean_strings(strings, ops):
    '''
    takes in a list of strings and a function 
    then for each string it calls the function on the string
    appends the new value to the result list and returns the result
    '''
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

clean_strings(states, clean_ops) #uses the array,'states' and the clean_ops function as args for the clean_strings function


['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

LAMBDA
---------

#### so-called anonymous or lambda functions, which are a way of writing functions consisting of a single statement, the result of which is the return value. They are defined with the lambda keyword, which has no meaning other than declaring an anonymous function

In [69]:
equiv_lamba = lambda x: x * 2    #would be same as;
def lamba_example(x):
    return x*2

In [70]:
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]
ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2)

[8, 0, 2, 10, 12]

In [71]:
stringd = ['fan','babaand','badadaass','cook','ping','pies','darkchoco']
stringd.sort(key=len)     #sorts the list using their len
stringd

['fan', 'cook', 'ping', 'pies', 'babaand', 'badadaass', 'darkchoco']

In [72]:
"""
suppose you wanted to sort a collection of strings by the number
of distinct letters in each string
"""
stringd.sort(key=lambda x: len(set(list(x))))
stringd

['fan', 'cook', 'ping', 'pies', 'babaand', 'badadaass', 'darkchoco']

CURRYING 
-----------------
### means deriving new functions from already existing ones by
***partial argument application***

In [73]:
def addition(x,y):
    """
this returns a function.
Here the addition is curried
"""
    return x+y
five_add = lambda x: addition(x,5)


In [74]:
"""
the above can be repeated using the functools module
"""
from functools import partial
five_add = partial(addition, 5)
five_add

functools.partial(<function addition at 0x7f1706632ca0>, 5)

GENERATORS
-----------

#### A python iterator protocol provides consistent/generic ways of iterating over a sequence like objects in a list or lines.
#### An iterator is an object that yields objects to the python interpreter when used on a sequence of objects.
#### Most methods expecting lists or list-like sequence use any iteerable object, built-in methods like min,max,sum and type constructors like list and tuple.

Generators 
------------
#### unlike the regular python functions that return a single result per time, generators provides a way to construct a new iterable object that returns a sequence of multiple results lazily, pausing after each one until the next one is requested. They are created using the yield keyword inplace of the usual return used in functions.

In [75]:
def example_gen(n=9):
    """
when the example_gen function is called, 
no code is executed but it returns an object
"""
    print('Generating cubes from 1 to {0}'.format(n ** 3))
    for x in range(1, n + 1):
        yield x ** 3
example_gen()

<generator object example_gen at 0x7f1706652ac0>

In [76]:
"""
but by requesting elements from the generator the code is executed
for example,
"""
for y in example_gen(): 
    print(y, end=" ")

Generating cubes from 1 to 729
1 8 27 64 125 216 343 512 729 

Generator expressions 
-------------
just like with list, set and dict comprehension, generators can be created in a more concise way using generator expressions by enclosing similar to list comprehension values within parentheses instead of square brackets.


In [77]:
generator = (x**3 for x in range(40))
generator
#like said earlier to execute the code, request elements from generators, therefore
[y for y in generator]

[0,
 1,
 8,
 27,
 64,
 125,
 216,
 343,
 512,
 729,
 1000,
 1331,
 1728,
 2197,
 2744,
 3375,
 4096,
 4913,
 5832,
 6859,
 8000,
 9261,
 10648,
 12167,
 13824,
 15625,
 17576,
 19683,
 21952,
 24389,
 27000,
 29791,
 32768,
 35937,
 39304,
 42875,
 46656,
 50653,
 54872,
 59319]

In [78]:
'''
the above code, is therefore equivalent to
'''
def _gen_ex():
    for x in range(40):
        yield x**3

gen_list=[]
for x in _gen_ex():
    gen_list.append(x)
print(gen_list)

[0, 1, 8, 27, 64, 125, 216, 343, 512, 729, 1000, 1331, 1728, 2197, 2744, 3375, 4096, 4913, 5832, 6859, 8000, 9261, 10648, 12167, 13824, 15625, 17576, 19683, 21952, 24389, 27000, 29791, 32768, 35937, 39304, 42875, 46656, 50653, 54872, 59319]


In [79]:
'''
other examples of using list comprehensions with builtin methods
like sum and type constructors like dict
'''
sum(((k+5)*3) for k in range(30))

1755

In [80]:
dict((d, d **3) for d in range(30))

{0: 0,
 1: 1,
 2: 8,
 3: 27,
 4: 64,
 5: 125,
 6: 216,
 7: 343,
 8: 512,
 9: 729,
 10: 1000,
 11: 1331,
 12: 1728,
 13: 2197,
 14: 2744,
 15: 3375,
 16: 4096,
 17: 4913,
 18: 5832,
 19: 6859,
 20: 8000,
 21: 9261,
 22: 10648,
 23: 12167,
 24: 13824,
 25: 15625,
 26: 17576,
 27: 19683,
 28: 21952,
 29: 24389}

ITERTOOLS MODULE
-----------
### has a collection of generators for many common data algorithms. For example, groupby takes any sequence and a function, grouping consecutive elements in the sequence by return value of the function, FOR EXAMPLE;

In [81]:
import itertools
'''
this lambda function is same as 
def afunc(x):
    return x[0].
    
The groupby groups closely situated words with same first letter together
in a list, while lambda function returns the first letter of each group
'''
first_letter = lambda x: x[0]   #this checks for the first letter and returns it
names = ['Raphael','Noriode','Alan','Adler','Brux','Simeon','Cassie','Charles','Mary','Mavy']
for letter, name in itertools.groupby(names, first_letter):
    print(letter, list(name))   #note: name is a generator and has to be requested for execution.



R ['Raphael']
N ['Noriode']
A ['Alan', 'Adler']
B ['Brux']
S ['Simeon']
C ['Cassie', 'Charles']
M ['Mary', 'Mavy']


### Note: when handing errors/exceptions, in cases where you want some code to be executed regardless of whether the code in the try block succeeds or not, use finally;


In [82]:
try:
    open_file = open('afile.txt','w')
    open_file.write('some_string')
except:
    print('error')
else:
    print('Done')
finally:
    open_file.close()
 #the file would get closed whether the code fails or succeeds

Done


Numpy array
---------------


In [83]:
import numpy as np
dataset =  np.random.randn(3,5)
dataset

array([[-0.42534378,  1.67101612, -0.91334337, -0.62054435, -1.03031462],
       [ 0.13800081, -0.00722204, -1.23886814,  0.06670894, -2.03428087],
       [ 0.51372117, -0.69034939, -0.2139756 , -0.81948025,  0.49791958]])

In [84]:
[x for y in dataset for x in y]

[-0.4253437780335604,
 1.6710161202202745,
 -0.9133433659693954,
 -0.6205443529745375,
 -1.0303146190362462,
 0.13800081173299586,
 -0.007222041117679059,
 -1.2388681392637755,
 0.0667089408499451,
 -2.034280868537741,
 0.5137211706995911,
 -0.6903493915671436,
 -0.2139755975428382,
 -0.8194802470814829,
 0.49791957653006164]

ndarray
-----
### is a generic multidimensional container for data of the same type, that has a .shape, a tuple that determines its' dimensions and a dtype that shows its' data type.

In [85]:
#for example;
dataset.shape

(3, 5)

In [86]:
dataset.dtype

dtype('float64')

### Creating ndarrays
#### ndarrays are created using the array function which accepts any sequence like object like lists.
for example;

In [87]:
"""
this creates a multidimensional numpy array
note: all nested arrays must be of the same dimension
else an error would be raised"""
arr = [[4,6,4],[4,3,3],[44,65,4],[5,6,8]]
nd_arr = np.array(arr)
nd_arr

array([[ 4,  6,  4],
       [ 4,  3,  3],
       [44, 65,  4],
       [ 5,  6,  8]])

In [88]:
"""
to know the number of arrays created, you can use 
.ndim to view this"""
nd_arr.shape

(4, 3)

In [89]:
nd_arr.ndim

2

In [90]:
nd_arr.dtype

dtype('int64')

CREATING NUMPY ARRAYS USING ZEROS, ONES, arange AND EMPTY FUNCTIONS.
-----------------
- numpy zeros function creates an ndarray of 0's.
- numpy ones creates an array of 1s.
- arange is an array-valued version of the python range function.
- empty creates an array withput initializing its value.

In [91]:
np.zeros(9)  #this creates a one-dimensional array of 0's.

array([0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [92]:
np.ones((3,5))   #this creates a multi-dimensional array of ones  

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [93]:
np.empty((2, 3, 2))   #creates two multi-dimensional arrays with no particular datatype

array([[[4.68170696e-310, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000]],

       [[0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000]]])

In [94]:
np.arange(12)   #this creates a list from 0 - 11

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

Specifying the datatype of ndarrays using dtype
----------------
- dtypes are a source of NumPy’s flexibility for interacting with data coming from other systems, they provide a mapping directly onto an underlying disk or memory representation, which makes it easy to read and write binary streams of data to disk and also to connect to code written in a low-level language like C or Fortran.
- The numerical dtypes are named the same way: a type name, like float or int , followed by a number indicating the number of bits per element.


In [95]:
arr_one = np.array([4,6,4,3,5,5,6], dtype = np.float64)

In [96]:
arr_one.dtype

dtype('float64')

EXPLICITLY CONVERTING AN ARRAY FROM ONE TYPE TO ANOTHER USING .astype()
-------------------------
- Calling astype always creates a new array (a copy of the data), even if the new dtype is the same as the old dtype

In [97]:
int_arr = arr_one.astype(np.int64)
int_arr.dtype

dtype('int64')

In [98]:
"""
casting a float to int truncates the decimal part,
that is, the integer part only is in the array
for example;
"""
disArr = np.array([3.44,4.6,6,7.7,3.5], dtype = np.float64)
disArr = disArr.astype(np.int64)
disArr.dtype

dtype('int64')

In [99]:
disArr

array([3, 4, 6, 7, 3])

CONVERTING AN ARRAY OF NUMERICAL STRING TO NUMERIC FORM
------------------------

In [100]:
"""to convert an array of numerical strings to numeric form
you use the astype() function
for example;"""

num_str = np.array(['44','44.0','4.6','6.5','9.0'], dtype = np.string_)
num_str

array([b'44', b'44.0', b'4.6', b'6.5', b'9.0'], dtype='|S4')

In [101]:
num_str.astype(np.float64)   #using float inplace of np.float64 causes Numpy to use the python type equivalent to its dtype.

array([44. , 44. ,  4.6,  6.5,  9. ])

USING THE DTYPE OF AN ANOTHER ARRAY ON A DIFFERENT ARRAY
----------------------

In [102]:
int_arr = np.arange(9)
anoda_arr = np.array(['44','44.0','4.6','6.5','9.0'], dtype = np.float64)
int_arr.dtype

dtype('int64')

In [103]:
int_arr = int_arr.astype(anoda_arr.dtype)  #this converts the type of the int_arr to float the dtype of anoda_arr
int_arr

array([0., 1., 2., 3., 4., 5., 6., 7., 8.])

In [104]:
#to view the dtype of the int_arr
int_arr.dtype

dtype('float64')

In [105]:
"""
Using shorthand code strings torefer to a 
dtype

u8 specifies an unsigned 8-bit(1 byte) integer type"""

ones=np.ones(5, dtype = 'u8')
ones

array([1, 1, 1, 1, 1], dtype=uint64)

ARITHMETIC OPERATIONS WITH NUMPY ARRAYS
------------------
### Vectorization: is the ability of arrays to perform batch operations on data without the for-loop.

In [106]:
"""
Any arithmetic
operations between
equal-size arrays applies the operation element-wise
"""
een_arr = np.array([[4,5,6],[6,4,9],[3,5,5]])
een_arr

array([[4, 5, 6],
       [6, 4, 9],
       [3, 5, 5]])

In [107]:
een_arr * een_arr

array([[16, 25, 36],
       [36, 16, 81],
       [ 9, 25, 25]])

In [108]:
een_arr /een_arr

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [109]:
"""performing arithmetic operations with scalars result
in scalar args to the elements in the array, example;
"""
1 / een_arr

array([[0.25      , 0.2       , 0.16666667],
       [0.16666667, 0.25      , 0.11111111],
       [0.33333333, 0.2       , 0.2       ]])

In [110]:
"""Comparing arrays of same size yield
boolean arrays, example;
"""
vijf_arr= np.array([[0.25,67, 0.16666667],[9, 0.25, 40],[0.33333333, 0.2,0.2]])
k = een_arr > vijf_arr
k, k.dtype

(array([[ True, False,  True],
        [False,  True, False],
        [ True,  True,  True]]),
 dtype('bool'))

INDEXING AND SLICING IN NUMPY ARRAYS
-----------
Just like with python arrays, numpy arrays also can be sliced.

In [111]:
this_arr = np.arange(11)
this_arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [112]:
this_arr[4:9]

array([4, 5, 6, 7, 8])

In [113]:
"""
one value can be assigned to a slice and cause a slice is a view of the array,
the changes are also effected in the array, this is known as BROADCASTING. example;
"""
this_arr[4:9] = 33
this_arr

array([ 0,  1,  2,  3, 33, 33, 33, 33, 33,  9, 10])

In [114]:
"""
the values in the sliced array can also be altered and this 
would also affect the original array,
cause all slices are views to the actual array
"""
change_arr = this_arr[4:9]
change_arr[3] = 7085
this_arr

array([   0,    1,    2,    3,   33,   33,   33, 7085,   33,    9,   10])

In [115]:
"""inorder to actually get a copy f a slice from a numpy array, 
you would have to explicitly copy the array with the .copy() method
for example,"""
change_arr = this_arr[4:9].copy()
change_arr[3]=30098
this_arr    #notice that this doesn't change the actual array cause change_arr is a copy and not a view

array([   0,    1,    2,    3,   33,   33,   33, 7085,   33,    9,   10])

In [116]:
"""
but the change_arr was actually altered
as seen here"""
change_arr

array([   33,    33,    33, 30098,    33])

SLICING MULTI-DIMENSIONAL ARRAY
----------
conventionally elements in python multi-dimensionsal arrays, are accessed recursively for example;

In [117]:
arrd = np.array([[1, 2, 3], [4, 5, 6],[5,8,4],[9,4,7]])
#to access the third element in the second array,6 you would;

arrd[1][2]

6

In [118]:
'''
But with numpy arrays, slicing can be done with
comma seperated values for example;
'''
arrd[1, 2]


6

#### Muilti-dimensional when put in tabular form can thought of like axis of a table where axis A is "rows" and axis B is "columns".


In [119]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d[0,1,1]

5

In [120]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [121]:
"""
Just like done in one-dimensional array, the elements in a
multi-dimensional can be sliced/indexed and changed
as seen here.
To change the elements of the second array in the arrd
"""

arr3d[0] = 33   #changes all elements in the first array
arr3d[0]

array([[33, 33, 33],
       [33, 33, 33]])

In [122]:
"""this alters the content of the first multi-dimensional array 
cause the array was mirrored and not copied, the initial array
"""
arr3d

array([[[33, 33, 33],
        [33, 33, 33]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [123]:
"""
to change the value without altering the inital array 
you copy the array or element and make the changes
"""
copyarr = arr3d[0].copy()
copyarr 

array([[33, 33, 33],
       [33, 33, 33]])

In [124]:
copyarr[0] = 43
copyarr

array([[43, 43, 43],
       [33, 33, 33]])

In [125]:
#notice the values of an array the initial does not change
arr3d

array([[[33, 33, 33],
        [33, 33, 33]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

Indexing multi-dimensional arrays
------------------
just like python arrays, numpy arrays can also be indexed.

In [126]:
arr4d = np.array([[[3,5,3,2], [34,5,6,1],[23,6,44,8]],
        [[44,3,5,7],[18,9,21,12],[78,5,6,14]]])
arr4d.shape

(2, 3, 4)

In [127]:
arr4d[:1]    #this returns the first multi-dimensional 

array([[[ 3,  5,  3,  2],
        [34,  5,  6,  1],
        [23,  6, 44,  8]]])

In [128]:
"""
all conditions applying to previous arrays apply to this 
that is, to copy, .copy() method is used"""
arr4d[:1, :, :2] = 564
arr4d

array([[[564, 564,   3,   2],
        [564, 564,   6,   1],
        [564, 564,  44,   8]],

       [[ 44,   3,   5,   7],
        [ 18,   9,  21,  12],
        [ 78,   5,   6,  14]]])

BOOLEAN INDEXING OF NDARRAYS
----------------


In [129]:
"""
Here we would specify an array of random numbers
and an array of words, then check for corresponding occurrences
of a word, then index the arr of numbers using the boolean array
returned from the check.
"""
rand_arr = np.random.randn(7, 3)   #a 3 x 5 array
name_arr = np.array(['Bob','Foo','Bob','Bob','Sam','Joe','Rex'])
#to check elements corresponding to 'Bob' in nam_arr
name_arr == 'Bob'   #this returns an array of boolean values, true values for all correspoding cases
#and false for all negative cases

array([ True, False,  True,  True, False, False, False])

In [130]:
"""
the length of the arr must be same as the array indexing it
here the returned array is 7 in length, which is why I initialized
a 7 x 3 array, else this results in an error.
"""
rand_arr

array([[ 0.0916526 , -0.04189856, -0.61770741],
       [ 1.40271386,  0.43379107,  1.16571544],
       [-0.18208768,  0.91992037, -0.1926417 ],
       [ 1.62845325,  1.93125694, -0.29817573],
       [ 1.72086647, -0.73101699, -0.45257566],
       [ 2.60302456,  0.34744254,  0.5996749 ],
       [ 1.75175897,  0.66898376,  1.45171949]])

In [131]:
"""
what happens here is that the boolean values are arranged across each
length of values and the values where the True values are these
values are returned, this is done from the x-axis"""
rand_arr[name_arr == 'Bob']

array([[ 0.0916526 , -0.04189856, -0.61770741],
       [-0.18208768,  0.91992037, -0.1926417 ],
       [ 1.62845325,  1.93125694, -0.29817573]])

In [132]:
"""
The result of the above can also like an array be indexed
for example, the comma seperated indexed value serves as the 
indexing slice
This would return elements from positions 0 - 1
in both x and y axis 
"""
rand_arr[name_arr == 'Bob', :2]

array([[ 0.0916526 , -0.04189856],
       [-0.18208768,  0.91992037],
       [ 1.62845325,  1.93125694]])

In [133]:
rand_arr[name_arr == "Bob", 1]  #returns the second array of elements on the y-axis

array([-0.04189856,  0.91992037,  1.93125694])

NEGATING BOOLEAN CONDITIONS
--------------
TO NEGATE THE BOOLEAN CONDITION, WE USE != and ~(boolean condition) when using it as a slice directly.



In [134]:
name_arr != 'Bob' #returns the negated form of the boolean array

array([False,  True, False, False,  True,  True,  True])

In [135]:
"""
inorder to directly use negated/inverted boolean arrays directly as 
slices for another array, you do the following;
"""
"""
notice the values retrieved earlier are completely different from these ones
this is because the boolean conditions changed. Therefore, where there
was a True value there is a false value and vice versa
"""
rand_arr[~(name_arr == "Bob")] #the ~ sign before the condition check in ()
                         #automatically negates the boolean values to be used as slices

array([[ 1.40271386,  0.43379107,  1.16571544],
       [ 1.72086647, -0.73101699, -0.45257566],
       [ 2.60302456,  0.34744254,  0.5996749 ],
       [ 1.75175897,  0.66898376,  1.45171949]])

In [136]:
"""
the above can also be sliced like other arrays
"""
rand_arr[~(name_arr == "Bob"), :1]   #this returns all arrays in position 0


array([[1.40271386],
       [1.72086647],
       [2.60302456],
       [1.75175897]])

COMBINING MULTIPLE BOOLEAN CONDITIONS USING |(or) AND &(and)
------------------------
#### Note: the and/or boolean keywords are not used in numpy arrays but their symbols for example;


In [137]:
(name_arr == "Bob")

array([ True, False,  True,  True, False, False, False])

In [138]:
(name_arr == "Foo")

array([False,  True, False, False, False, False, False])

Combining conditions using |
---------

In [139]:
"""
here the two arrays gotten from each array check is compared
against the other and a new array is generated in which its values
are true where both elements are true and 
any element is true in that position of the arrays
and false where both are false"""

(name_arr == "Bob") | (name_arr == "Foo")

array([ True,  True,  True,  True, False, False, False])

In [140]:
"""
we can also slice and negate the array from the combined 
condition check as done before using (), ~ 
for example; in comparison with the above array,
you would notice the array is inverted """

~((name_arr == "Bob") | (name_arr == "Foo"))

array([False, False, False, False,  True,  True,  True])

In [141]:
"""
we could also use this in slicing the random array generated earlier
for example,"""

rand_arr[~((name_arr == "Bob") | (name_arr == "Foo"))]

array([[ 1.72086647, -0.73101699, -0.45257566],
       [ 2.60302456,  0.34744254,  0.5996749 ],
       [ 1.75175897,  0.66898376,  1.45171949]])

Combining conditions using &
---------------

In [142]:
"""
here the two arrays gotten from each array check is compared
against the other and a new array is generated in which its values
are only true where both elements are true, else it is false
"""

(name_arr == "Bob") & (name_arr == "Foo")

array([False, False, False, False, False, False, False])

In [143]:
#now let's invert this 
~((name_arr == "Bob") & (name_arr == "Foo"))

array([ True,  True,  True,  True,  True,  True,  True])

In [144]:
#let's use it as a slice to rand_arr
rand_arr[~((name_arr == "Bob") | (name_arr == "Foo"))]

array([[ 1.72086647, -0.73101699, -0.45257566],
       [ 2.60302456,  0.34744254,  0.5996749 ],
       [ 1.75175897,  0.66898376,  1.45171949]])

#### *Note: the result gotten from using | and & can also be sliced like the other arrays indexed earlier*


ALTERING SPECIFIC VALUES IN A NUMPY ARRAY
--------------

In [145]:
"""
lets change all negative values in the rand_arr to 3
to do this we would do the following
"""
"""
so in a case where you don't want your initial array 
altered, first copy the array to a variable using .copy()
then make your changes
"""

rand_arr[rand_arr < 0] = 3
rand_arr   #notice that all negative values have been replaced by 3

array([[0.0916526 , 3.        , 3.        ],
       [1.40271386, 0.43379107, 1.16571544],
       [3.        , 0.91992037, 3.        ],
       [1.62845325, 1.93125694, 3.        ],
       [1.72086647, 3.        , 3.        ],
       [2.60302456, 0.34744254, 0.5996749 ],
       [1.75175897, 0.66898376, 1.45171949]])

In [146]:
"""
this can also be done to all positive values
Also note that this alters the original rand_arr
so in a case where you don't want your initial array 
altered, first copy the array to a variable using .copy()
then make your changes
"""

rand_arr[rand_arr > 0] = 8900
rand_arr  #all the values in the array got altered cause the initial
          #changes made, replaced the negative values by 3.

array([[8900., 8900., 8900.],
       [8900., 8900., 8900.],
       [8900., 8900., 8900.],
       [8900., 8900., 8900.],
       [8900., 8900., 8900.],
       [8900., 8900., 8900.],
       [8900., 8900., 8900.]])

In [147]:
"""
We can also use boolean arrays to alter the values in another
array as seen below;
let's define an array of random numbers then use another array of
character values with a boolean condition to index and change
true values
"""
arandArr = np.random.randn(8,3)
alphaNumArr = np.array(['Vier','Twee','Eend','Vier','Twee', 'Meisje','Drie','Vier'])

arandArr[alphaNumArr == 'Vier'] = 4

arandArr

array([[ 4.        ,  4.        ,  4.        ],
       [ 0.49911328,  1.24688141,  0.12797602],
       [-0.05386412,  0.14824178,  0.03622516],
       [ 4.        ,  4.        ,  4.        ],
       [ 1.09003916, -0.49333916,  0.1953586 ],
       [ 1.77377584, -0.34575555,  0.81595626],
       [ 0.77435457, -1.19903317,  1.13743822],
       [ 4.        ,  4.        ,  4.        ]])

FANCY INDEXING
----------

In [148]:
"""
let's create an array of numbers
"""
numArr = np.zeros((7,4))
for x in range(7):
    numArr[x] = x+3
numArr

array([[3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.],
       [8., 8., 8., 8.],
       [9., 9., 9., 9.]])

In [149]:
"""
To select a particular subset of data
from the array
"""
"""
that is, it enters the first array
then it picks the array from the positions 3,5,6,0
"""
numArr[[3,5,6,0]] #this returns the arrays in positions 3,5,6


array([[6., 6., 6., 6.],
       [8., 8., 8., 8.],
       [9., 9., 9., 9.],
       [3., 3., 3., 3.]])

In [150]:
numArr[[-1, -5, -3]] #the array can also be sliced using negative indexes

array([[9., 9., 9., 9.],
       [5., 5., 5., 5.],
       [7., 7., 7., 7.]])

In [151]:
"""
Let's create an array of numbers
then re-shape into multi-dimensional arrays
"""
numba = np.arange(40).reshape((8,5))
numba

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39]])

In [152]:
"""
we can slice the array using the following:
it sorts the array into tuples like;
the first array represents the columns,
the second array represents the rows.
red=(4,2),purple=(0,2),green=(3,4)
"""
numba[[4,0,3],[2,2,4]]

array([22,  2, 19])

![vvv.png](attachment:vvv.png)

Image explaining more about slicing the array above.
------

In [153]:
"""
just like the above let's redo this with
different slice values
"""
numba[[1,3,3],[2,1,0]]
#(1,2),(3,1),(3,0)

array([ 7, 16, 15])

In [154]:
"""
this takes each value and creates an array against the second array
like (1)[0, 3, 1, 2], (4)[0, 3, 1, 2], (0)[0, 3, 1, 2] (2)[0, 3, 1, 2]
and creates a multidimensional array
"""

numba[[1, 4, 0, 2]][:, [0, 3, 1, 2]]

array([[ 5,  8,  6,  7],
       [20, 23, 21, 22],
       [ 0,  3,  1,  2],
       [10, 13, 11, 12]])

In [155]:
"""
notice that fancy indexing copies the data
into a new array
"""
numba[[1, 4, 0, 2]][:, [0, 3, 1, 2]] = 7809800
numba

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39]])

TRANSPOSING ARRAYS AND SWAPPING AXES
----------------
#### reshapes an array by interchanges the axes of an array and creates a new array, it does not copy the array. 
#### Example: a 3x4 array becomes a 4x3

In [156]:
"""
lets creates a multi-dimensional array
"""
marr = np.arange(48).reshape(6, 8)
marr

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29, 30, 31],
       [32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47]])

In [157]:
"""
now let's create a transpose of this array
using .T
Notice that the axes have been swapped for each array
"""
marrTranspose = marr.T
marrTranspose

array([[ 0,  8, 16, 24, 32, 40],
       [ 1,  9, 17, 25, 33, 41],
       [ 2, 10, 18, 26, 34, 42],
       [ 3, 11, 19, 27, 35, 43],
       [ 4, 12, 20, 28, 36, 44],
       [ 5, 13, 21, 29, 37, 45],
       [ 6, 14, 22, 30, 38, 46],
       [ 7, 15, 23, 31, 39, 47]])

In [158]:
"""
when performing matrix computations, using np.dot
for example, computing the inner matrix product 
the transpose comes in matric
lets see an example;
let's create a new array of random numbers, get its transpose and
use it as parameters for np.dot to get the inner matrix product
"""

arrNum = np.random.randn(5,3) #create a multi-dimensional array 
arrNum

array([[ 0.01071251,  1.77516685, -0.75750393],
       [-0.43361015,  0.75780628, -0.2485985 ],
       [-0.11824496,  2.15888942,  0.16001012],
       [-0.08503624, -0.0516372 , -1.12711161],
       [ 0.43769052, -0.33444752,  0.9101091 ]])

In [159]:
np.dot(arrNum.T, arrNum) #computation for the inner matrix product

array([[ 0.40091854, -0.70684725,  0.57495112],
       [-0.70684725,  8.50081277, -1.43382405],
       [ 0.57495112, -1.43382405,  2.75989581]])

CREATING AND DERIVING THE TRANSPOSE OF HIGHER MULTIDIMENSIONAL ARRAYS
----------------

In [160]:
"""
this creates 3 multi-dimensional arrays with 
3x4 dimensions
"""
highArr = np.arange(36).reshape((3,3,4))
highArr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[24, 25, 26, 27],
        [28, 29, 30, 31],
        [32, 33, 34, 35]]])

In [161]:
"""
to derive the transpose of this array created above, the transpose
requires a tuple of axis values.
the values to be used must be values within the range of one lesser
than the axes value of the column.
It does not taken in repetitive values like (0,0,3) is wrong
for example;
"""

highArr.transpose((0,1,2))  #this returns the same matrix

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[24, 25, 26, 27],
        [28, 29, 30, 31],
        [32, 33, 34, 35]]])

In [162]:
"""
this returns a matrix of values derived from picking
values from columns and interchanging them with the row values
for each multi-dimensional array
"""
highArr.transpose((0,2,1))

array([[[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]],

       [[12, 16, 20],
        [13, 17, 21],
        [14, 18, 22],
        [15, 19, 23]],

       [[24, 28, 32],
        [25, 29, 33],
        [26, 30, 34],
        [27, 31, 35]]])

In [163]:
"""
this returns a matrix of values derived from picking
values from the first rows of each multi-dimensional values
and creates a new array of it and also does the same for the following
row values.
"""
highArr.transpose((1,0,2))

array([[[ 0,  1,  2,  3],
        [12, 13, 14, 15],
        [24, 25, 26, 27]],

       [[ 4,  5,  6,  7],
        [16, 17, 18, 19],
        [28, 29, 30, 31]],

       [[ 8,  9, 10, 11],
        [20, 21, 22, 23],
        [32, 33, 34, 35]]])

SWAPAXES
-----
### Ndarray has the method *swapaxes* , which takes a pair of axis numbers and switches the indicated axes to rearrange the data. It also returns a view on the data without making a copy.
for example;

In [164]:
highArr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[24, 25, 26, 27],
        [28, 29, 30, 31],
        [32, 33, 34, 35]]])

In [165]:
highArr.swapaxes(2,1) #interchanges the position of the values on their initial axes

array([[[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]],

       [[12, 16, 20],
        [13, 17, 21],
        [14, 18, 22],
        [15, 19, 23]],

       [[24, 28, 32],
        [25, 29, 33],
        [26, 30, 34],
        [27, 31, 35]]])

UNIVERSAL FUNCTIONS(ufunc)
--------
are functions that perform operations on individual elements in an array and return a new array of the result from the operation, which is mostly a single array.
They are classified into the following;
- Unary Ufunc: are ufuncs that take in one array and performs operations then returns a new array of its resultant values.
- Binary Ufunc: are ufuncs that take in two arrays and perform operations then return a single array.


UNARY UNIVERSAL  FUNCTIONS
-------

In [166]:
"""
Let's define a new array
"""
newArr = np.arange(17)
np.sqrt(newArr)    #gets the sqrt of values in an array.

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766, 3.31662479, 3.46410162, 3.60555128, 3.74165739,
       3.87298335, 4.        ])

In [167]:
np.exp(newArr)   #gets the exp of values in an array

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04,
       1.62754791e+05, 4.42413392e+05, 1.20260428e+06, 3.26901737e+06,
       8.88611052e+06])

In [168]:
np.square(np.sqrt(newArr))  #returns the square of elements in an array.

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16.])

In [169]:
np.sign(np.random.randn(9))  #returns the sign of each element 
                             #1 (positive), 0 (zero), or –1 (negative)

array([-1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.])

In [170]:
np.sqrt(np.array([5,7,9,3,6,8]))

array([2.23606798, 2.64575131, 3.        , 1.73205081, 2.44948974,
       2.82842712])

In [171]:
"""
returns the smallest integer > or = the number
"""
np.ceil(np.sqrt(np.array([5,7,9,3,6,8])))

array([3., 3., 3., 2., 3., 3.])

In [172]:
np.floor(np.sqrt(np.array([5,7,9,3,6,8,34]))) #returns the integer part of a floating point number

array([2., 2., 3., 1., 2., 2., 5.])

Binary Ufunc
-------

In [173]:

"""
the size of the two arrays to be compared must be same
else a valueError would be thrown
"""
x = np.random.randn(9)
y = np.random.randn(9)
x

array([-0.15302519, -0.58998211,  0.477254  , -0.73175764,  0.45316217,
        1.39497762,  0.85909883, -1.01967759, -0.79898593])

In [174]:
y

array([-0.1167239 ,  0.54547968, -0.27344299,  1.53655756,  0.15217984,
        0.4685714 ,  0.57018843, -0.17001839,  0.37206014])

In [175]:
"""
this compares elements in each array against it's corresponding
element in the other array and returns a new single array of 
the max values
"""
np.maximum(x,y)

array([-0.1167239 ,  0.54547968,  0.477254  ,  1.53655756,  0.45316217,
        1.39497762,  0.85909883, -0.17001839,  0.37206014])

In [176]:
np.add(x,y)

array([-0.26974909, -0.04450243,  0.20381101,  0.80479992,  0.605342  ,
        1.86354902,  1.42928727, -1.18969597, -0.42692579])

In [177]:
np.divide(x,y)

array([ 1.31100134, -1.08158403, -1.74535103, -0.47623185,  2.9778069 ,
        2.97708658,  1.50669284,  5.99745485, -2.14746446])

In [178]:
np.subtract(x,y)

array([-0.03630129, -1.13546178,  0.75069698, -2.2683152 ,  0.30098233,
        0.92640622,  0.2889104 , -0.8496592 , -1.17104607])

In [179]:
np.multiply(x,y)

array([ 0.0178617 , -0.32182325, -0.13050176, -1.12438774,  0.06896214,
        0.65364662,  0.48984822,  0.17336394, -0.29727082])

### Ufunc can also return multiple arrays, for example, when using the numpy .modf method, it returns both the fractional and remainder part of a floating point array.

In [180]:
arrVal = np.multiply(x,y)
remainder,whole = np.modf(arrVal)
remainder

array([ 0.0178617 , -0.32182325, -0.13050176, -0.12438774,  0.06896214,
        0.65364662,  0.48984822,  0.17336394, -0.29727082])

In [181]:
whole

array([ 0., -0., -0., -1.,  0.,  0.,  0.,  0., -0.])

In [182]:
"""
Ufuncs accept an optional out argument that allows them to operate in-place on
arrays.
Noter the arrays entered must be of the same kind else an error would 
be thrown
"""

np.sqrt(x, x)

  np.sqrt(x, x)


array([       nan,        nan, 0.69083572,        nan, 0.67317321,
       1.18109171, 0.92687585,        nan,        nan])

In [183]:
points = np.arange(5, 35, 9)   #creates a range of numbers between 5 and -5 spaced with 0.68
points

array([ 5, 14, 23, 32])

In [184]:
"""
creates a multi-dimensional array of the same length of the
initial array, organizes the elements in the array against the axis
"""
xs,ys = np.meshgrid(points, points)     
xs          

array([[ 5, 14, 23, 32],
       [ 5, 14, 23, 32],
       [ 5, 14, 23, 32],
       [ 5, 14, 23, 32]])

In [185]:
ys

array([[ 5,  5,  5,  5],
       [14, 14, 14, 14],
       [23, 23, 23, 23],
       [32, 32, 32, 32]])

In [186]:
xs + ys

array([[10, 19, 28, 37],
       [19, 28, 37, 46],
       [28, 37, 46, 55],
       [37, 46, 55, 64]])

In [187]:
z = np.sqrt(xs ** 2 + ys ** 2)
z

array([[ 7.07106781, 14.86606875, 23.53720459, 32.38826948],
       [14.86606875, 19.79898987, 26.92582404, 34.92849839],
       [23.53720459, 26.92582404, 32.52691193, 39.40812099],
       [32.38826948, 34.92849839, 39.40812099, 45.254834  ]])

Expressing Conditional Logic as Array Operations
----------------


In [188]:
"""
this returns a value from the warr when the value in cond is
True, else value from yarr"""
warr = np.array([3.4, 3.3, 4.0, 5.8, 7.5])
yarr = np.array([6.5, 4.6, 5.5, 6.0, 9.9])
cond = np.array([True, False, True, True, False])
resut = [(x if c else y) for x, y, c in zip(warr, yarr, cond)]
resut

[3.4, 4.6, 4.0, 5.8, 9.9]

In [189]:
"""
The above can be achieved simply using np.where
"""
resut = np.where(cond, warr, yarr)
resut

array([3.4, 4.6, 4. , 5.8, 9.9])

In [190]:
"""
We change the positive values and negative values in an array
by simply using np.where. np.where accepts arrays or non-arrays as its'
second and third values.
Let's create an array of random values and replace all negative
values 8 and the positive values with 5.
"""

randomArr = np.random.randn(5,5)
randomArr

array([[ 4.39096128, -1.33903468,  0.35002582, -1.54420141,  0.2567759 ],
       [-0.72555777, -0.05626163,  0.56771219, -0.68650939,  0.51732535],
       [ 0.01790029,  0.8289801 ,  1.46919441, -1.02236035,  0.12237406],
       [-0.34539493, -0.94784684,  1.67464821, -1.6245256 ,  0.7046263 ],
       [-0.04121273,  1.68960426, -0.52867567, -0.44455024, -0.12117   ]])

In [191]:
"""this checks for values > 0 and replaces them to 5
else it replaces such value with 8
"""
np.where(randomArr > 0, 5,8)

array([[5, 8, 5, 8, 5],
       [8, 8, 5, 8, 5],
       [5, 5, 5, 8, 5],
       [8, 8, 5, 8, 5],
       [8, 5, 8, 8, 8]])

In [192]:
"""we can also replace only the negative values by doing
as follows;
Notice the change in the boolean operator.
Also the 3rd value is an array, this causes the array value to be
returned where the element is > 0
"""
np.where(randomArr < 0, 6, randomArr)

array([[4.39096128, 6.        , 0.35002582, 6.        , 0.2567759 ],
       [6.        , 6.        , 0.56771219, 6.        , 0.51732535],
       [0.01790029, 0.8289801 , 1.46919441, 6.        , 0.12237406],
       [6.        , 6.        , 1.67464821, 6.        , 0.7046263 ],
       [6.        , 1.68960426, 6.        , 6.        , 6.        ]])

AGGREGATIONS
-------
#### are mathematical functions that compute statistics about an entire array or about the data along an axis are accessible as methods of the array class like sum, mean and std.

In [193]:
aggArr = np.random.randn(6,4)
aggArr

array([[ 1.98099817,  0.76469481, -0.53343891,  1.37113694],
       [-1.21309437,  0.63875354, -0.12242404, -0.39740518],
       [-0.12967748,  0.48387414, -1.33651354, -0.13433715],
       [-0.93761995, -1.48257636,  0.35399552, -0.2402182 ],
       [-0.66251316, -1.64999333,  1.57098628, -0.69282818],
       [ 1.43617686,  0.78104832, -0.41034964, -0.21524969]])

In [194]:
aggArr.mean()
#or
np.mean(aggArr)   #returns the mean of all values in the multi-dimensional array

-0.03235727560481796

In [195]:
"""
the mean and sum of the rows and columns
by passing in axis = 0 for computing across the rows and 
axis = 1 for computing across the columns resulting in a 
a one-dimensional array of elements. 
"""

aggArr.mean(axis = 1)

array([ 0.89584775, -0.27354251, -0.27916351, -0.57660475, -0.3585871 ,
        0.39790646])

In [196]:
"""
.cumsum performs the operations of the fibonacci series
and returns an array of the values. For example;
"""

fibArr = np.arange(17)
fibArr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])

In [197]:
fibArr.cumsum()  #performs the fibonnaci operations

array([  0,   1,   3,   6,  10,  15,  21,  28,  36,  45,  55,  66,  78,
        91, 105, 120, 136])

In [205]:
""" 
for multi-dimensional arrays, the axis to perform the operation 
can be passed as an argument to the cumsum method,
like .cumsum(axis = 0)
"""

miArr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])


In [199]:
miArr.cumsum(axis=0)  #performs operations along the columns
miArr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [200]:
miArr.cumprod(axis=0)  #returns the fibo prod. on the columnsb

array([[ 0,  1,  2],
       [ 0,  4, 10],
       [ 0, 28, 80]])

In [206]:
miArr.argmin(axis=1) #indices of minimum and maximum elements

array([0, 0, 0])

METHODS FOR BOOLEAN ARRAYS
-----------
some boolean array methods include the following:
- .sum(): returns the total number of a check value in an array
- .any(): checks if any True values exists in an array, and returns True if any exists.
- .all(): checks if all the values in an array are True, and returns either True or false.

In [208]:
"""
let's create a random array of values and calculate
the total number of negative number in the array
"""
somArr = np.random.randn(3,3)
somArr

array([[ 0.47938005,  1.41754545, -1.011917  ],
       [-0.69516123,  0.378273  , -0.38225419],
       [ 1.08507796,  0.5721494 , -0.19108405]])

In [209]:
(somArr < 0).sum()

4

In [211]:
"""
lets specify a boolean array of random bool values
then check use the any and all to check against it
"""
boolArr = np.array([True, False, False, True, True, True])
boolArr

array([ True, False, False,  True,  True,  True])

In [213]:
boolArr.any()   #checks for existence of True values

True

In [214]:
boolArr.all()    #checks if all values are True

False

In [226]:
"""numpy arrays can also be sorted using the .sort() method
to sort multi-dimensional arrays the axis args can also
be passed
np.sort returns a sorted copy of an array instead of modifying
the array in-place
"""
sort_Arr = np.random.randn(5,3)
sort_Arr

array([[ 0.56672017, -0.5125413 ,  1.14856768],
       [ 1.24023883, -0.51558193, -0.51843521],
       [ 0.72426321,  0.062162  , -0.72446492],
       [-0.96296775, -0.94716709, -0.30690826],
       [ 0.06081638, -0.80457062,  1.72085636]])

In [228]:
sort_Arr.sort(0)   #sorts along the columns
sort_Arr

array([[-0.96296775, -0.94716709, -0.72446492],
       [ 0.06081638, -0.80457062, -0.51843521],
       [ 0.56672017, -0.51558193, -0.30690826],
       [ 0.72426321, -0.5125413 ,  1.14856768],
       [ 1.24023883,  0.062162  ,  1.72085636]])

In [241]:
oneDimArr= np.random.randn(4)
oneDimArr.sort()
oneDimArr

array([-0.87416408, -0.39286307, -0.05175211,  1.00130016])

In [262]:
"""
sorting of arrays can be used to get quantiles of an array
let's say we want to get the 7th quantile of sortedArr
we can simply do the following using slicing on the sorted
to get the 7th quantile
"""
sortedArr = np.random.randn(10)
sortedArr.sort()
sortedArr

array([-1.43166416, -0.02424425,  0.02120963,  0.8213947 ,  1.0955762 ,
        1.11657552,  1.30050728,  1.46684367,  1.57205496,  1.67834364])

In [264]:
sortedArr[int(0.7 * len(sortedArr))] #returns the 7th element in the sorted array

1.4668436722313778

Unique and Other Set Logic
--------
NumPy has some basic set operations for one-dimensional ndarrays like np.unique, np.in1d.

In [265]:
"""
This sorts an array and returns unique elements in the array
just like the python, code seen below on the next block would do, and
it returns the dtype of the array
"""
char = np.array(['Gold', 'Drag', 'Goat', 'Drag', 'Gold','Goat'])
np.unique(char) 

array(['Drag', 'Goat', 'Gold'], dtype='<U4')

In [266]:
sorted(set(char))  #the set() returns unique elements then sorted() sorts in place the values returned

['Drag', 'Goat', 'Gold']

In [267]:
vals = np.array([4,5,6,7,0,4,3,3,7,6,9,7,0])
np.in1d(vals,[3,9]) #this checks the val arr for occurrences of 3 or 9 and returns true if found else false

array([False, False, False, False, False, False,  True,  True, False,
       False,  True, False, False])

In [271]:
#### other array set operations
eenArr = np.arange(8)
tweeArr = np.array([3,7,67,4,44,0,4,8,9,16,33,90])
np.union1d(eenArr,tweeArr)  #returns a new array containing sorted union elements of both arrays

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 16, 33, 44, 67, 90])

In [273]:
np.intersect1d(eenArr, tweeArr) #returns a sorted array of similar elements in both arrays

array([0, 3, 4, 7])

In [275]:
np.setxor1d(eenArr, tweeArr)  #removes all similar elements and sorts the array

array([ 1,  2,  5,  6,  8,  9, 16, 33, 44, 67, 90])

In [277]:
np.setdiff1d(eenArr, tweeArr) #returns a sorted array of the difference in elements of both arrays

array([1, 2, 5, 6])

File input and output with arrays
-----------
#### Numpy can save and load data to and from disk using its np.load and np.save functions

In [278]:
"""to save the content of saveArr to the arrFile,
you pass in the filename and the content/file to be saved as args
to np.save, np.save takes in these  2 args.
this creates an arrFile with the .npy extension and
adds the content of saveArr to it.
Once this is done check your current working directory for this file
"""
saveArr = np.arange(35)
np.save('arrFile',saveArr) 

In [279]:
"""
to load and view the contents of this file 
we use np.load() passing in file name to open as args
in this case, "arrFile.npy"
"""
np.load('arrFile.npy')

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34])

In [283]:
"""
multiple arrays can also be saved in an
uncompressed archive using np.savez which saves arrays as dict-like objects,
Here the arrays are passed in as args after the filename, and
assigning the arr to value which when saved becomes the key, while
the array becomes the value, just like with dictionaries
current working directory.
let's save the last 4 arrays we created earlier
"""
np.savez('largeArrFile',a=saveArr, b=eenArr, c=tweeArr, d=vals)

In [294]:
"""
Now let's load this file and view its' content.
Note that files with .npz extension when loaded
return a dict-like object.
so we would load it into a variable
"""
var = np.load("largeArrFile.npz")

In [287]:
"""
now get the actual values of the arrays, like with dict
we would use the keys we passed in initially.
let's view the contents in c.
"""
var['c']

array([ 3,  7, 67,  4, 44,  0,  4,  8,  9, 16, 33, 90])

In [291]:
"""
all the contents of this file can be viewed just like with the
python dictionaries with .items() method"""
[(k,v) for k,v in var.items()]

[('a',
  array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
         17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
         34])),
 ('b', array([0, 1, 2, 3, 4, 5, 6, 7])),
 ('c', array([ 3,  7, 67,  4, 44,  0,  4,  8,  9, 16, 33, 90])),
 ('d', array([4, 5, 6, 7, 0, 4, 3, 3, 7, 6, 9, 7, 0]))]

In [295]:
"""
multiple arrays can also be saved in a compressed format using
np.savez_compressed() function.
for example, let's resave he last 4 arrays.
check your current working directory and compare their sizes
Mine is;
largeArrFile.npz = 1.5kilobytes
compressedFile.npz = 876bytes
"""
np.savez_compressed('compressedFile',a=saveArr, b=eenArr, c=tweeArr, d=vals)

LINEAR ALGEBRA
---
