# Python Vectorization

## Use Case 1: Finding the Sum of numbers

### Using Loops

In [1]:
import time
start = time.time()

total = 0
for item in range(0, 1500000):
    total = total + item

print("sum is : {}".format(total))
end = time.time()
print(end - start)

sum is : 1124999250000
0.16914677619934082


### Using Vectorization

In [2]:
import numpy as np

start = time.time()
print(np.sum(np.arange(1500000)))
end = time.time()
print(end - start)

1124999250000
0.007483959197998047


## Use Case 2: Mathematical Operations (on DataFrame)

In [8]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randint(0, 50, size=(500000, 4)), columns=('a','b','c','d'))
print(df.shape)
df.head()

(500000, 4)


Unnamed: 0,a,b,c,d
0,35,22,41,35
1,34,49,28,36
2,14,37,21,13
3,39,16,45,45
4,35,43,19,49


### Using Loops

In [9]:
import time

start = time.time()

for idx, row in df.iterrows():
    df.at[idx, 'ratio'] = 100 * (row["d"]/row["c"])
end = time.time()
print(end - start)

  df.at[idx, 'ratio'] = 100 * (row["d"]/row["c"])
  df.at[idx, 'ratio'] = 100 * (row["d"]/row["c"])


62.439393043518066


### Using Vectorization

In [10]:
start = time.time()
df["ratio"] = 100 * (df["d"]/df["c"])
end = time.time()
print(end - start)

0.009186029434204102


## Use Case 3: If-else Statement (on DataFrame)

### Using Loops

In [11]:
import time

start = time.time()
for idx, row in df.iterrows():
    if row.a == 0:
        df.at[idx, 'e'] = row.d
    elif (row.a <= 25) and (row.a > 0):
        df.at[idx, 'e'] = row.b - row.c
    else:
        df.at[idx, 'e'] = row.b + row.c

end = time.time()
print(end - start)

75.22597885131836


### Using Vectorization

In [12]:
start = time.time()
df['e'] = df['b'] + df['c']
df.loc[df['a'] <= 25, 'e'] = df['b'] - df['c']
df.loc[df['a'] == 0, 'e'] = df['d']
end = time.time()
print(end - start)

0.03017115592956543


## Use Case 4: Solving Deep Learning Networks

In [13]:
import numpy as np

# setting initial values of m
m = np.random.rand(1, 5)

# input values for 500,000 rows
x = np.random.rand(500000, 5)

### Using Loops

In [14]:
import numpy as np
import time

m = np.random.rand(1, 5)
x = np.random.rand(500000, 5)

y = [0] * 500000

start = time.time()
for i in range(500000):
    total = 0
    for j in range(5):
        total = total + x[i][j]*m[0][j]
    
    y[i] = total

end = time.time()
print(end - start)

1.7129251956939697


### Using Vectorization

In [15]:
start = time.time()

# dot product
np.dot(x, m.T)

end = time.time()
print(end - start)

0.004037141799926758
