In [None]:
%%R
options(htmltools.dir.version = FALSE)
knitr::opts_chunk$set(
  message = FALSE,
  warning = FALSE,
  dev = "svg",
  fig.align = "center",
  #fig.width = 11,
  #fig.height = 5
  cache = TRUE
)

# define vars
om = par("mar")
lowtop = c(om[1],om[2],0.1,om[4])
library(tidyverse)
library(knitr)
library(reticulate)
use_python("C:\\Users\\jbpost2\\AppData\\Local\\Programs\\Python\\Python310\\python.exe")
#use_python("C:\\python\\python.exe")
options(dplyr.print_min = 5)
options(reticulate.repl.quiet = TRUE)

layout: false
class: title-slide-section-red, middle

# User Defined Functions 
Justin Post

---
layout: true

<div class="my-footer"><img src="img/logo.png" style="height: 60px;"/></div> 

---

# Function Creation Syntax

- We're ready to build our own functions!

- To create our own functions, we just need to use the keyword `def`

In [None]:
def function_name(arg1, arg2, arg3 = default_arg3):
    """
    Documentation string
    """
    Function body
    return object

---

# Write Our Own Mean Function

- Sample mean

$$\bar{y}=\frac{1}{n}\sum_{i=1}^{n}y_i$$

--

In [None]:
def find_mean(y):
    """
    Quick function to find the mean
    Assumes we have a list with only numeric type data
    """
    return sum(y)/len(y)

--

In [None]:
find_mean(list(range(0,11)))

---

# Add Argument with Default

- Want to add in **trimmed mean** functionality

    - Need to sort the observations
    - Remove the lowest p% and highest p%
    - Find mean on that

--

In [None]:
from math import floor, sqrt
import random
y = random.sample(list(range(1,101)), 50)
y

--

In [None]:
y.sort()
y

---

# Add Argument with Default

- Want to add in **trimmed mean** functionality

    - Need to sort the observations
    - Remove the lowest p% and highest p%
    - Find mean on that


In [None]:
p = 0.1
to_remove = floor(p*len(y))
to_remove

--

In [None]:
y[to_remove:(len(y)-to_remove)]

---

# Add in Trimmed Mean Option

In [None]:
from math import floor, sqrt
import random
def find_mean(y, method = None, p = 0):
    """
    Quick function to find the mean
    Assumes we have a list with only numeric type data
    If method is set to trimmed, will remove outer most p values off the data
    """
    if method == "Trim": #we'll cover if shortly!
      y.sort() #modifies in place
      to_remove = floor(p*len(y))
      y = y[to_remove:(len(y)-to_remove)]
    return sum(y)/len(y)

---

# Test Function

In [None]:
x = random.sample(range(1,101), 50)
x
find_mean(x)
find_mean(x, method = "Trim", p = 0.1)
x.sort()
x
find_mean(x[5:45])

---

# Positional vs Named Arguments

- A function can be called using positional or named args

In [None]:
#def find_mean(y, method = None, p = 0):
find_mean(x, None)
find_mean(method = "Trim", p = 0.1, y = x)
find_mean(x, "Trim", 0.1)

--

- Can't place positional args after a keyword

In [None]:
find_mean(y = x, "Trim")

`SyntaxError: positional argument follows keyword argument (<string>, line 1)`

---

# Defining the Type of Argument

- A function definition may look like:

def f(pos1, pos2, /, pos_or_kwd, *, kwd1, kwd2):
           -----------    ----------     ----------
           |              |                  |
           |         Positional or keyword   |
           |                                 - Keyword only
           -- Positional only

In [None]:
def print_it(x, y, /):
    print("Must pass x and y positionally!" + x + y)
    
def print_it(x, /, y):
    print("x must be passed positionally.  y can be positional or named" + x + y)
    
def print_it(x, /, y, *, z):
    print("Now z must be passed as a named argument" + x + y + z)

---

# Write Our Own Correlation Function

- Pearson's correlation:

$$r = \frac{\sum_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y})}{\sqrt{\sum_{i=1}^n(x_i-\bar{x})^2\sum_{i=1}^{n}(y_i-\bar{y})^2}}$$

where
- $(x_i, y_i)$ are numeric variables observed on the same $n$ units, $i=1,...,n$


---

# Write Our Own Correlation Function

$$r = \frac{\sum_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y})}{\sqrt{\sum_{i=1}^n(x_i-\bar{x})^2\sum_{i=1}^{n}(y_i-\bar{y})^2}}$$

Function inputs:
- $x$, $y$, lists with numeric entries only

Function body:
- Find sample means for $x$ and $y$
- Compute numerator sum and denominator sums
- Find quotient and return that value

---

# Finding Sums of Interest

- Easier once we learn arrays, but for now we'll peak at a for loop and `zip`

In [None]:
x = random.sample(range(1,101), 50)
x
xbar = find_mean(x)

In [None]:
y = list(range(1,50))
y
ybar = find_mean(y)

---

# Finding Sums of Interest

- Easier once we learn arrays, but for now we'll peak at a for loop and `zip`

In [None]:
(x[0]-xbar)**2
den_x = 0
for i, j in zip(x, [xbar]*len(x)):
    den_x += (i-j)**2
den_x

---

# Write Our Own Correlation Function


In [None]:
def find_corr(x, y):
    """
    Compute Pearson's Correlation Coefficient
    x and y are assumed to be lists with numeric values
    Data is assumed to have no missing values
    """
    xbar = find_mean(x)
    ybar = find_mean(y)
    num = 0
    den_x = 0
    den_y = 0
    for i, j, k, l in zip(x, [xbar]*len(x), y, [ybar]*len(y)):
        num +=(i-j)*(k-l)
        den_x +=(i-j)**2
        den_y +=(k-l)**2
    return num/sqrt(den_x*den_y)
print(find_corr.__doc__)

---

# Test the Function

In [None]:
find_corr(x,y)
x.sort()
find_corr(x,y)

---

# Other Things to Note

- When executing a function, a new symbol table is used for the local variables

In [None]:
import numpy as np
y = np.array(range(1,11))

def square(z):
    y = z**2
    print("In the function environment, z = " + str(z) + " and y = " + str(y))
    return(y)

square(y)
print(y)

In [None]:
print(z)

---

# Other Things to Note

- Can define global variables in a function

In [None]:
def square(z):
    global y
    y = z**2
    print("In the function environment, z = " + str(z) + " and y = " + str(y))
    return(y)

square(y)
print(y) #modified globally

---

# Other Things to Note

- If nothing is returned from a function (with `return`) then it actually returns `None`

In [None]:
def square_it(a):
    if (type(a) == int) or (type(a) == float):
      return a**2
    else:
      return
    
square_it(10)
square_it(10.5)
square_it("10")
print(square_it("10"))

---

# Other Things to Note

- Default values are only evaluated once - at the time of the function **definition**

- Mutable objects can cause an issue!

In [None]:
def my_append(value, L = []):
    L.append(value)
    return L

print(my_append("A"))
print(my_append("B"))

---

# Other Things to Note

- Default values are only evaluated once - at the time of the function **definition**

- Instead define the default value as `None`

In [None]:
def my_append(value, L = None):
    if L is None:
        L = []
    L.append(value)
    return L

print(my_append("A"))
print(my_append("B"))

---

# To JupyterLab!  

- Create a function to graph the normal approximation to a binomial and return a probability

    + Option for a continuity correction

---

# Recap

- Writing functions is super cool!

In [None]:
def func_name(args):
    """
    Doc string
    """
    body
    return object

- Many ways to set up your function arguments and to call your function

- More on function writing later!
