# Chapter 4: Functional Programming: Rudimentary Statistics and Analytics

In [None]:
# The general form of a function
# def function_name(object1, object2, ... , objectn):
#    <operations>

### Total
$\sum_{i=0}^{n-1} x_{i}$

In [4]:
n = 0
total = 0
values = [i for i in range(10)]

print("total\t","value")
for value in values:
    total += value
    print(total,"\t", value)

total	 value
0 	 0
1 	 1
3 	 2
6 	 3
10 	 4
15 	 5
21 	 6
28 	 7
36 	 8
45 	 9


This way of coding is inefficient becuase it must be rewritten each time it needs to be used.
Instead, the code can be stored as a function, dramatically shortening the amount of code that needs to be written.

In [7]:
def total(lst):
    total_ = 0
    # in original, I used the index of the list
    # n = len(lst)
    # for i in range(n)
    for val in lst:
        total_ += val
    return total_
total(values)

45

In [8]:
total([i for i in range(-1000, 10000, 53)])

932984

In [9]:
import random
x1 = [random.randint(0,100) for i in range(10)]
total(x1)

439

### Mean
$\bar{X} = \frac{\sum_{i=0}^{n-1} x_{i}} {n}$

To calculate the mean, sum all the numbers and divide by the quantity of numbers

In [14]:
def mean(lst):
    n = len(lst)
    mean_ = total(lst) / n
    return mean_

#more consice
def meanc(lst):
    return total(lst) / len(lst)

In [15]:
mean(x1)

43.9

In [16]:
meanc(x1)

43.9

Now I will build Median, Mode, Var, SD, SE, Cov, and Corr

### Median

In [24]:
def median(lst):
    n = len(lst)
    lst = sorted(lst)
    # if length is odd (not divisible by two)
    if n % 2 != 0:
        middle_index = int((n - 1) / 2)
        median_ = lst[middle_index]
    # if length is evem
    else:
        upper_middle_index = int(n/2)
        lower_middle_index = upper_middle_index - 1
        median_ = mean(lst[lower_middle_index : upper_middle_index + 1])
   
    return median_

In [25]:
median1 = median(x1)
median1

43.5

In [27]:
median2 = median([random.randint(0,100) for i in range(11)])
median2

66

### Mode

In [32]:
def mode(lst):
    count_dct = {}
    #create entries for each val with zero
    for key in lst:
        count_dct[key] = 0
    #increment entry w/ each occurance
    for key in lst:
        count_dct[key] += 1
    #calc max count upfront    
    max_count = max(count_dct.values())
    #compare each val
    mode_ = []
    for key, count in count_dct.items():
        if count == max_count:
            mode_.append(key)
        
    return mode_

In [33]:
lst = [1,2,3,4,5,5,5,5,5,5,5,5]
mode(lst)

[5]

In [35]:
lst = [1,1,1,1,1,2,3,4,5,5,5,5,5]
mode(lst)

[1, 5]

### Variance

We define population variance as:

$$ \sigma^2 = \frac{\sum_{i=1}^n (X_i - \bar{X})^2}{n}$$

and the sample variance as:

$$ S^2 = \frac{\sum_{i=1}^n (X_i - \bar{X})^2}{n-1}$$


In [38]:
def variance(lst, sample = True):
    mean_ = mean(lst)
    n = len(lst)
    DoF = n - 1
    sum_sq_diff = 0
    
    for val in lst:
        sum_sq_diff += (val - mean_) ** 2
    if sample:
        variance_ = sum_sq_diff / (n-1)
    else:
        variance_ = sum_sq_diff / (n)
    return variance_

In [40]:
variance(lst), variance(lst, sample = False)

(3.5, 3.230769230769231)

In [41]:
def SD(lst, sample = True):
    SD_ = variance(lst, sample) ** (1/2)
    return SD_

In [43]:
SD(lst), SD(lst, False)

(1.8708286933869707, 1.7974340685458343)