In [1]:
#Chapter 5: Writing Our Own Functions

#Defining A Function
    #determine what the inputs (number and types of function argumetns),
        #and outputs (type of return value) will be from the function
#get_at_content fxn: input (DNA seq = string), output (decimal number)

#include for Python2
from __future__ import division

def get_at_content(dna):
    length = len(dna)
    a_count = dna.count('A')
    t_count = dna.count('T')
    at_content = (a_count + t_count) / length
    return at_content
#def = define; followed by the name of the fxn, name of the argument variable in()
    #end first line with a colon, indent following lines
    #last line = "return" at_content that was calculated in the fxn

#defining and running/calling a fxn are two separate things 

#Execute/call the function by:
get_at_content("ATGACTGGACCA")
    #caveat: AT content will disappear unless stored as a variable

at_content = get_at_content("ATGACTGGACCA")

#use it directly with a print statement:
print("AT content is " + str(get_at_content("ATGACTGGACCA")))

#Note: the argument variable (dna) does not hold any value when the
    #function is defined -- it holds whatever value is given when the 
        #fxn is called -- similar to loop variables

#Variables created as part of the fxn only exist within the fxn and
    #cannot be accessed outside


AT content is 0.5


In [3]:
#Calling and Improving our Function

def get_at_content(dna, sig_figs):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

#variable my_at_content stores calculated at content value
my_at_content = get_at_content("ATGCGCGATCGATCGAATCG", 2)
print(str(my_at_content))
print(get_at_content("ATGCATGCAACTGTAGC", 2))
print(get_at_content("aactgtagctagctagcagcgta", 2))

test_dna = "ATGCATGCAACTGTAGC"
print(get_at_content(test_dna, 1))
print(get_at_content(test_dna, 2))
print(get_at_content(test_dna, 3))

#Python round fxn takes two arguments (number we want to round and number of sig figs)

0.45
0.53
0.52
0.5
0.53
0.529


In [4]:
#Encapsulation with Functions
    #you can change the function definition without having to make changes
        #to the code that uses the function

#Encapsulation = dividing a complex program into smaller parts that can
    #be worked on independently of one another
    
#Functions Don't Always Have to Take and Argument
    #you can define a function without arguments -- but they aren't 
        #very useful

def get_a_number():
    return 42

#Functions Don't Always Have to Return a Value
    #ie. writing a function that calculates at_content and prints it to the screen
    #this results in a function that is less flexible
    #write fxns so that they accomplish one job, not two

In [5]:
#Functions can be called with named arguments

#Must know the return value & type as well as the order of arguments.
#Keyword arguments: calls arguments by supplying a list of argument
    #variable names & values joined by equal signs
        #rather than giving a list of arguments in ()

get_at_content(dna="ATCGTGACTCG", sig_figs=2)

    #Advantages: clear, does not rely on order of arguments, can be mixed & matched
#Keyword arguments are useful for fxns and methods that have multiple arguments

0.45

In [6]:
#Function arguments can have defaults

#Recall: open function takes two arguments -- a file name and a mode string
    #if you call it with just the file name it uses a default value for the mode string

#version of get_at_content function where default sig_figs is 2

def get_at_content(dna, sig_figs=2):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

get_at_content("ATCGTGACTCG")
get_at_content("ATCGTGACTCG", 3)
get_at_content("ATCGTGACTCG", sig_figs=4)

#Fxn argument defaults allow for flexible functions that can have multiple arguments
    #usefule for fxns where some of the options are used infrequently

0.4545

In [8]:
#Testing Functions

#"ASSERT" = built-in python tool for checking code
    #assert, followed by call to the fxn, ==, then the expected result

assert get_at_content("ATGC") == 0.5

#if the assertion is false the program will stop and you will get an
    #AssertionError

#Benefits: provide a means to check if the fxn is working and can help
    #track potential errors in the program, allows for modification to a 
        #fxn and ensure no errors were introduced, form of documentation,
            # & can be used to test behavior of fxn for unusual inputs
    
#unusual inputs == "N" in the dna sequence:
assert get_at_content("ATGCNNNNNNNNN") == 0.5

AssertionError: 

In [9]:
#assertion will fail unless get_at_content fxn is modified to:

def get_at_content(dna, sig_figs=2):
    dna = dna.replace('N', '')
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

#Test assertion once more
assert get_at_content("ATGCNNNNNNNNN") == 0.5
    #should now pass

#Can group a collection of assertions for a particular function together
    #to test for the correct behavior on different types of input

assert get_at_content("A") == 1
assert get_at_content("G") == 0
assert get_at_content("ATGC") == 0.5
assert get_at_content("AGG") == 0.33
assert get_at_content("AGG", 1) == 0.3
assert get_at_content("AGG", 5) == 0.33333