# Writing Functions

## Theory

In [1]:
def print_greeting():
    print('Hello!')
    print('The weather is nice today.')
    print('Right?')

print_greeting()

Hello!
The weather is nice today.
Right?


In [2]:
def print_date(year, month, day):
    joined = str(year) + '/' + str(month) + '/' + str(day)
    print(joined)

print_date(1871, 3, 19)
print_date(month=3, day=19, year=1871)

1871/3/19
1871/3/19


In [3]:
result = print_date(1871, 3, 19) # because return is not defined
print('result of call is:', result)

1871/3/19
result of call is: None


In [4]:
def average(values):
    if len(values) == 0:
        return None
    return sum(values) / len(values)

a = average([1, 3, 4])
print('average of actual values:', a)
print('average of empty list:', average([]))

average of actual values: 2.6666666666666665
average of empty list: None


In [163]:
# Functions will often contain conditionals. Here is a short example that will indicate which quartile the argument is in based on hand-coded values for the quartile cut points.

def calculate_life_quartile(exp):
    if exp < 58.41:
        # This observation is in the first quartile
        return 1
    elif exp >= 58.41 and exp < 67.05:
        # This observation is in the second quartile
       return 2
    elif exp >= 67.05 and exp < 71.70:
        # This observation is in the third quartile
       return 3
    elif exp >= 71.70:
        # This observation is in the fourth quartile
       return 4
    else:
        # This observation has bad data
       return None

calculate_life_quartile(62.5)

# That function would typically be used within a for loop, but Pandas has a different, more efficient way of doing the same thing, and that is by applying a function to a dataframe or a portion of a dataframe. Here is an example, using the definition above.

data = pd.read_csv('data/gapminder_all.csv')
data['life_qrtl'] = data['lifeExp_1952'].apply(calculate_life_quartile)

# There is a lot in that second line, so let’s take it piece by piece. On the right side of the = we start with data['lifeExp'], which is the column in the dataframe called data labeled lifExp. We use the apply() to do what it says, apply the calculate_life_quartile to the value of this column for every row in the dataframe.

2

## Miscellaneous

## Exercises

In [5]:
# Read the code below and try to identify what the errors are without running it.
# def another_function
#   print("Syntax errors are annoying.")
#    print("But at least python tells us about them!")
#   print("So they are usually not too hard to fix.")
# Run the code and read the error message. Is it a SyntaxError or an IndentationError?
# Fix the error.
# Repeat steps 2 and 3 until you have fixed all the errors.

def another_function():
    print("Syntax errors are annoying.")
    print("But at least python tells us about them!")
    print("So they are usually not too hard to fix.")

another_function()

# answer: the general structure for creating functions is violated; missing colon; the second for the arguments needs to stay empty, since we are not using any input variables; indentation is incorrect in the body of the code (4 spaces); the function name is not specific, but rather general
# answer: SyntaxError - missing section for arguments; SyntaxError - missing colon; IndentationError - not 4 spaces before the body of the code

Syntax errors are annoying.
But at least python tells us about them!
So they are usually not too hard to fix.


In [6]:
# What does the following program print?
def report(pressure):
    print('pressure is', pressure)

print('calling', report, 22.5) # issue

# answer: calling <function report at 0x715780683a00> 22.5
# explaination: A function call always needs parenthesis, otherwise you get memory address of the function object

# fix:
print("calling")
report(22.5) # correct

calling <function report at 0x70819b77a560> 22.5
calling
pressure is 22.5


In [11]:
# What’s wrong in this example?
# result = print_time(11, 37, 59)

# def print_time(hour, minute, second):
#    time_string = str(hour) + ':' + str(minute) + ':' + str(second)
#    print(time_string)

# answer: return is not defined, this None will be inserted into the result variable; indentation was only 3 spaces; we are calling the variable before defyining the function itself

def print_time(hour, minute, second):
    time_string = str(hour) + ':' + str(minute) + ':' + str(second)
    print(time_string)
    #return(time_string) # added
    
result = print_time(11, 37, 59)

# After fixing the problem above, explain why running this example code gives this output:
result = print_time(11, 37, 59)
print('result of call is:', result)

# answer: the rerturn was not defined within the function and since every function returns something, nothing was returned, but something was, thus in a version of None which was inserted into result variable (automatically returned if return is not defined and somehting should be returned), although the time is printed, since print function is included within the function

11:37:59
11:37:59
result of call is: None


In [18]:
# Fill in the blanks to create a function that takes a single filename as an argument, loads the data in the file named by the argument, and returns the minimum value in that data.
# import pandas as pd

# def min_in_data(____):
#     data = ____
#     return ____

import pandas as pd

def min_in_data(file):
    data = pd.read_csv(file)
    return data.min()

min_in_data('../../Specific Research Project in Ecology/SK_insect_herbivory_2021-2025.csv') # works!

year                             2021
logger_ID                           1
ID                               103A
Taxon               Achillea moschata
treatment                     control
cover_obs1                        0.0
cover_obs2                        0.0
insect_herbivory                  0 %
dtype: object

In [27]:
# Fill in the blanks to create a function that takes a list of numbers as an argument and returns the first negative value in the list. What does your function do if the list is empty? What if the list has no negative numbers?
# def first_negative(values):
#     for v in ____:
#         if ____:
#             return ____

#list_nums = [1, -2, 0, -5, 2, -7]
#list_nums = []
list_nums = [1, 2, 3]

def first_negative(values):
    for v in values:
        if v < 0:
            return v

neg_num = first_negative(list_nums)
print(neg_num)

# answer: if the list is emtpy, nothing is returned as None, same applies to the case when there is no negative number present

None


In [28]:
# Earlier we saw this function:
# def print_date(year, month, day):
#     joined = str(year) + '/' + str(month) + '/' + str(day)
#     print(joined)

# We saw that we can call the function using named arguments, like this:
# print_date(day=1, month=2, year=2003)
# What does print_date(day=1, month=2, year=2003) print?
# When have you seen a function call like this before?
# When and why is it useful to call functions this way?

print_date(day=1, month=2, year=2003)

# answer: 2003/2/1 as a string
# answer: when defyining a variable (inserting values) or my R course :D
# answer: for quick and easy comprehension, especially by those who are not the authors of the code; improves readibility; order does not matter which is also easier to call the function correctly

2003/2/1


In [61]:
# The code below will run on a label-printer for chicken eggs. A digital scale will report a chicken egg mass (in grams) to the computer and then the computer will print a label.
import random
for i in range(10):

    # simulating the mass of a chicken egg
    # the (random) mass will be 70 +/- 20 grams
    mass = 70 + 20.0 * (2.0 * random.random() - 1.0) # in range 0-1, multipliers with 2 first, then 1 is subtracted to purely have a +/- multiplier 

    print(mass)

    # egg sizing machinery prints a label
    if mass >= 85:
        print("jumbo")
    elif mass >= 70:
        print("large")
    elif mass < 70 and mass >= 55:
        print("medium")
    else:
        print("small")

print()

# The if-block that classifies the eggs might be useful in other situations, so to avoid repeating it, we could fold it into a function, get_egg_label(). Revising the program to use the function would give us this:

# revised version
# import random
# for i in range(10):

#     # simulating the mass of a chicken egg
#     # the (random) mass will be 70 +/- 20 grams
#     mass = 70 + 20.0 * (2.0 * random.random() - 1.0)

#     print(mass, get_egg_label(mass))

# Create a function definition for get_egg_label() that will work with the revised program above. Note that the get_egg_label() function’s return value will be important. Sample output from the above program would be 71.23 large.
# A dirty egg might have a mass of more than 90 grams, and a spoiled or broken egg will probably have a mass that’s less than 50 grams. Modify your get_egg_label() function to account for these error conditions. Sample output could be 25 too light, probably spoiled.

# answer:

def get_egg_label(mass):

    if mass >= 90:
        print('mass too high: probably dirty or abnormal')
        return mass
    elif mass >= 85:
        print("jumbo")
        return mass
    elif mass >= 70:
        print("large")
        return mass
    elif mass < 70 and mass >= 55:
        print("medium")
        return mass
    elif mass >= 50:
        print("small")
        return mass
    else:
        print('mass too low: probably spoiled or abnormal')
        return mass

import random
for i in range(10):

    # simulating the mass of a chicken egg
    # the (random) mass will be 70 +/- 20 grams
    mass = 70 + 30.0 * (2.0 * random.random() - 1.0)

    print(get_egg_label(mass))

62.015609700347284
medium
78.81753005705077
large
68.3598049988704
medium
85.16227401851597
jumbo
64.93264067881661
medium
75.16671017492969
large
84.74525995246454
large
84.71990382027954
large
80.1115255318686
large
73.17990810547221
large

mass too high: probably dirty or abnormal
96.20533145301542
mass too high: probably dirty or abnormal
92.55314492371497
mass too low: probably spoiled or abnormal
48.334535160965814
medium
63.19651982913253
medium
61.807967614332085
mass too high: probably dirty or abnormal
95.16369599705826
medium
55.22896209704009
medium
59.44274830983363
large
76.05451726528037
medium
68.8548305684062


In [62]:
# Assume that the following code has been executed:
# import pandas as pd

# data_asia = pd.read_csv('data/gapminder_gdp_asia.csv', index_col=0)
# japan = data_asia.loc['Japan']

# Complete the statements below to obtain the average GDP for Japan across the years reported for the 1980s.

# answer:
year = 1983
gdp_decade = 'gdpPercap_' + str(year // 10)
avg = (japan.loc[gdp_decade + '2'] + japan.loc[gdp_decade + '7']) / 2

# Abstract the code above into a single function.

def avg_gdp_in_decade(country, continent, year):
    data_countries = pd.read_csv('data/gapminder_gdp_' + str(continent) + '.csv', delimiter = ',', index_col=0)
    place = data_countries.loc[str(country)]
    gdp_decade = 'gdpPercap_' + str(year // 10)
    avg = (place.loc[gdp_decade + '2'] + place.loc[gdp_decade + '7']) / 2
    return avg

# How would you generalize this function if you did not know beforehand which specific years occurred as columns in the data? For instance, what if we also had data from years ending in 1 and 9 for each decade? (Hint: use the columns to filter out the ones that correspond to the decade, instead of enumerating them in the code.)

# answer:
def avg_gdp_in_decade(country, continent, year):
    data_countries = pd.read_csv('data/gapminder_gdp_' + str(continent) + '.csv', index_col=0)
    place = data_countries.loc[str(country)]
    gdp_decade = 'gdpPercap_' + str(year // 10)
    total = 0.0
    num_years = 0
    for yr_header in place.index:
        if yr_header.startswith(gdp_decade):
            total = total + place.loc[yr_header]
            num_years = num_years + 1
    return total/num_years

# could be called by:
avg_gdp_in_decade('Japan','asia',1983)

# e.g. <<< 20880.023800000003

NameError: name 'japan' is not defined

In [156]:
def logistic_map (x, r):
    t_plus_one = r * x * (1 - x) # or return r * x * (1 - x)
    return t_plus_one

initial_pop = 0.5
t_final = 10
r = 1
pop = [initial_pop]

for iteration in range(t_final + 1):
        t.append(iteration)
        pop.append(logistic_map(pop[iteration], r))

print(pop)

[0.5, 0.25, 0.1875, 0.15234375, 0.1291351318359375, 0.11245924956165254, 0.09981216674968249, 0.08984969811841606, 0.08177672986644556, 0.07508929631879595, 0.06945089389714401, 0.06462746723403166]


In [162]:
def iterate (initial_pop, t_final, r):
    for iteration in range(t_final + 2):
        if iteration == 0:
            #print('t : pop')
            t_pop = {iteration : initial_pop}
        elif iteration == t_final + 1:
            return t_pop
        else: 
            t_pop.update({iteration : logistic_map(t_pop[iteration-1], r)})

dynamical_system = iterate(0.5, 10, 1)

for time, population in dynamical_system.items():
    print(time, population)
print()
# or
for time_steps in (10, 100, 1000):
    dynamical_system = iterate(0.5, time_steps, 1)
    print()
    print('==================')
    print('time steps:', time_steps)
    print()
    for time, population in dynamical_system.items():
        print(time, population)

# answer: the population is reaching 0, but never becoming a 0

0 0.5
1 0.25
2 0.1875
3 0.15234375
4 0.1291351318359375
5 0.11245924956165254
6 0.09981216674968249
7 0.08984969811841606
8 0.08177672986644556
9 0.07508929631879595
10 0.06945089389714401

time steps: 10

0 0.5
1 0.25
2 0.1875
3 0.15234375
4 0.1291351318359375
5 0.11245924956165254
6 0.09981216674968249
7 0.08984969811841606
8 0.08177672986644556
9 0.07508929631879595
10 0.06945089389714401
time steps: 100

0 0.5
1 0.25
2 0.1875
3 0.15234375
4 0.1291351318359375
5 0.11245924956165254
6 0.09981216674968249
7 0.08984969811841606
8 0.08177672986644556
9 0.07508929631879595
10 0.06945089389714401
11 0.06462746723403166
12 0.06045075771294583
13 0.05679646360487655
14 0.05357062532685648
15 0.05070081342894604
16 0.04813024094658925
17 0.04581372085301251
18 0.04371482383461476
19 0.041803838011723354
20 0.04005627713921295
21 0.03845177180095951
22 0.036973233046326444
23 0.035606213084428476
24 0.03433841067421475
25 0.03315928422658373
26 0.03205974609616436
27 0.031031918776413835
28 0

0.25