## Basic: Using numpy vectorisations
Following the tutorial by Anmol [Say Goodbye to Loops in Python, and Welcome Vectorization!](https://medium.com/codex/say-goodbye-to-loops-in-python-and-welcome-vectorization-e8b0172b9581)

In [None]:
## Use Case 1: Finding the Sum of numbers
import time
import numpy as np

python_start = time.time()
# Loop: e sum through 1.5 million numbers 
total = 0
for item in range(0, 1500000):
    total = total + item 
print('python sum is:'+str(total))
python_end = time.time()
python_total_time = python_end - python_start
print('Python iterative sum time: {:.6f} seconds'.format(python_total_time))

print('---vs----')

numpy_start = time.time()
# Vectorisation: sum
total_sum = np.sum(np.arange(1500000))
print('numpy is:'+str(total))

numpy_end = time.time()
numpy_total_time = numpy_end - numpy_start
print('NumPy vectorized sum time: {:.6f} seconds'.format(numpy_total_time))
print('-------')
time_saving = python_total_time - numpy_total_time
percentage_savings = (time_saving / python_total_time) * 100
# print('Time saved by using NumPy: {:.6f} seconds'.format(time_saving))
print('Percentage savings by using NumPy: {:.2f}%'.format(percentage_savings))
time_multiple = python_total_time / time_saving
print('Using NumPy is {:.2f} times faster than using the Python loop.'.format(time_multiple))


In [None]:
## Use Case 2: Mathematical Operations (on DataFrame)
import time
import numpy as np
import pandas as pd


# create dataframe 
df = pd.DataFrame(np.random.randint(0, 50, size=(5000000, 4)), columns=('a', 'b', 'c', 'd'))
df.shape
df.head()

python_start = time.time()
# Loop: create a new column ratio for ration d& c
for idx, row in df.iterrows():
    df.at[idx, 'ratio'] = 100 * (row['d'] / row['c'])
python_end = time.time()
python_total_time = python_end - python_start
print('Python time: {:.6f} seconds'.format(python_total_time))

print('---vs----')

numpy_start = time.time()
# vectorized create new column
df['ratio'] = 100 * (df['d'] /df['c'])

numpy_end = time.time()
numpy_total_time = numpy_end - numpy_start
print('NumPy time: {:.6f} seconds'.format(numpy_total_time))


In [None]:
## Use Case 3: If else on dataframe
import time
import numpy as np
import pandas as pd


# create dataframe 
df = pd.DataFrame(np.random.randint(0, 50, size=(5000000, 4)), columns=('a', 'b', 'c', 'd'))
df.shape
df.head()

python_start = time.time()
# Loop: iterate through datafarame to create cooum
for idx, row in df.iterrows():
    if row.a == 0:
        df.at[idx, 'e'] = row.d
    elif (row.a >= 25):
       df.at[idx, 'e'] = row.b + row.c
    else:
        df.at[idx, 'e'] = row.b - row.c
python_end = time.time()
python_total_time = python_end - python_start
print('Python time: {:.6f} seconds'.format(python_total_time))

print('---vs----')

numpy_start = time.time()
# vectorization: create a new column
df['ratio'] = 100 * (df['d'] /df['c'])

numpy_end = time.time()
numpy_total_time = numpy_end - numpy_start
print('NumPy time: {:.6f} seconds'.format(numpy_total_time))


In [None]:
# Use Case 4: Solving Multi Regression problems
import time
import numpy as np
import pandas as pd


# create the data 
m = np.random.rand(1,5)
# input values for 5 million
x = np.random.rand(5000000, 5)



python_start = time.process_time()
# Loop: multiple arrays
total = 0 
for i in range(0,5000000):
    total = 0
    for j in range(0,5):
        total = total + x[i][j]*m[0][j]


python_end = time.process_time()
python_total_time = python_end - python_start
print('Python iterative computation time: {:.6f} seconds'.format(python_total_time))

print('---vs----')

numpy_start = time.process_time()
# Vectorisation: using dot matrix
np.dot(x,m.T)

numpy_end = time.process_time()
numpy_total_time = numpy_end - numpy_start
print('NumPy computational time: {:.6f} seconds'.format(numpy_total_time))


