In [1]:
def sum_up_to(x):
    sum = 0
    for i in range(x):
        sum+=i
    return sum

In [2]:
def sum_up_to(x):
    return x * (x+1) / 2

**Big O** or asymptotic efficiency is a much better way of analyzing algorithm (and data structure) performance. While we will discuss the mathematical definition in class, a simplified definition is simply the act of identifying an upper bound on the runtime of a function as the function input grows towards infinity.

In [None]:
# Constant Time
def constant(n):
    ops = 0
    x = n
    ops+=1
    return ops

print(constant(5))
print(constant(9001))

In [None]:
# Logarithmic Time
import math
def logarithmic(n):
    ops = 0
    for i in range(int(math.log2(n))):
        ops+=1
    return ops

print(logarithmic(5))
print(logarithmic(9001))

In [None]:
# Linear Time
def linear(n):
    ops = 0
    for i in range(n):
        ops+=1
    return ops

print(linear(5))
print(linear(9001))

In [None]:
# Quadratic Time
def quadratic(n):
    ops = 0
    for i in range(n):
        for j in range(n):
            ops+=1
    return ops

print(quadratic(5))
print(quadratic(9001))

In [1]:
def doStuff(inList1, inList2):
    c1 = 0
    for i in inList1:
        c1+=1
    
    c2 = 0
    for v1 in inList1:
        for v2 in inList2:
            c2+=1
    return c1, c2

print(doStuff([1,2,3], list(range(10))))

(3, 30)


In [5]:
def doStuff2(n):
    ops = 0
    for i in range(10):
        ops+=n
    return ops

print(doStuff2(5))
print(doStuff2(9001))

50
90010


In [23]:
def doStuff3(inList):
    ops = 0
    size = len(inList)
    while size > 0:
        size = int(size / 2)
        ops+=1
    return ops

def doStuff4(inList1, inList2):
    ops = 0
    for i in inList1:
        ops+= doStuff3(inList2)
    return ops

print(doStuff4([1,2,3], list(range(10))))
print(doStuff4([1,2,3], list(range(100))))
print(doStuff4([1,2,3], list(range(1000))))

12
21
30


**Randomness in Data Science** Whether you are simulating real-world data or just testing out an algorithm or function on a simplified dataset, creating a random dataset is a common task in data science. Here we will explore how we can use the built-in Python module `random` and (eventually) extend these functions to practical applications.


One such Python module is `random`, which implements pseudo-random number generators according to various probability distributions. What does it mean to be pseudo-random? Lets explore below!

In [16]:
import random

print(random.random())   #something different every time you run

random.seed(1)  #make sure it's the same everytime

print(random.random())

0.8474337369372327
0.13436424411240122


In [17]:
import random
random.seed() #Wall clock time seed
for i in range(3):
    print(random.random())

for i in range(2):
    print("*****")
    random.seed(1)
    for j in range(3):
        print(random.random())


0.809817202370851
0.7882837082814187
0.3125870959324071
*****
0.13436424411240122
0.8474337369372327
0.763774618976614
*****
0.13436424411240122
0.8474337369372327
0.763774618976614


In [18]:
min = 1
max = 5

print(random.randint(min, max))

random.seed()
l = []
for i in range(10):
    l.append(random.randint(0, 10))
print(l)

3
[4, 6, 4, 2, 10, 6, 4, 3, 6, 0]


In [19]:
options = ["A", "B", "C", "D"]

print(random.choice(options)) 

random.seed()
l = []
for i in range(10):
    l.append(random.choice(options))
print(l)

C
['D', 'B', 'B', 'C', 'C', 'C', 'A', 'A', 'D', 'B']


In [20]:
options = ["A", "B", "C", "D"]

print(random.sample(options, 1)) 

random.seed()
l = random.sample(options, 3)
print(l)

['A']
['C', 'B', 'D']


In [22]:
l = list(range(10))
print(l)
random.shuffle(l) #directly modifies list with new order
print(l)
random.shuffle(l)
print(l)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[7, 3, 2, 6, 0, 5, 1, 8, 9, 4]
[5, 8, 9, 7, 3, 2, 4, 0, 6, 1]
