In [None]:
from random import randint

## Datatypes


Python stores variables as specific datatypes, which tells Python what actions it's allowed to do with it. For example, multiplying two integers together works, but multiplying two strings together doesn't really make sense. Unlike some other languages, Python is a dynamically typed language, meaning that you don't need to explicitly declare the data type, Python will infer it for you. You are also sometimes able to change the type, either explicitly or implicitly.  

Here are some datatypes that were introduced last week:  

Strings  
Integers  
Float  
Boolean  

Here are some new ones we are going to introduce today:  
Tuples  
Lists  
Dictionaries  

Different `types` of variables have different methods (read: built-in functions) associated with them. We'll see some examples of those in a little bit. 

In [None]:
#the function 'type' tells you what datatype a given variable is
def testType(x):
    print(type(x))

In [None]:
testType(2)
testType(2.0)
testType("hello")
testType(False)

In [None]:
def testTypeSame(x,y):
    print(type(x) == type(y))

In [None]:
#NOTE 2, "2", and 2.0 are all stored differently
testTypeSame(2,3)
testTypeSame(2,'2')
testTypeSame(2,2.0)

In [None]:
#python let's you explicitly change the type of a variable
def changeVariable(x, vtype):
    print('Initial type: ',type(x))
    print('New type: ' ,type(vtype(x)))

In [None]:
changeVariable(2,str)

In [None]:
changeVariable("2",int)

In [None]:
#but sometimes Python doesn't know how to accomplish what you're telling it to do
##*will error*##
changeVariable("hello world",int)

In [None]:
#Sometimes Python will change type for you
def cutHalf(x):
    print(x, " is of type: ", type(x))
    print(x/2, " is of type: ", type(x/2))

In [None]:
cutHalf(2)

## Strings

In [None]:
#Here are some examples of built-in methods that you can run on strings:
def stringMethods(x):
    #convert to uppercase
    print(x.upper())
    #convert to lowercase
    print(x.lower())
    #sometimes these methods can take additional arguments
    print(x.startswith("hello"))
    #does the string contain this substring? If so return the first index of the substring
    print(x.find("ello"))
    #can start the search further into the string. Let's say if we want to find the index of the second 'o'
    print(x.find("o",6))
    #split the string into multiple segments based on character. Note this returns a list which we'll get to later
    print(x.split(" "))

In [None]:
stringMethods("hello world")

In [None]:
#same with functions, you need to save the output of methods as a variable to make the changes take place
def lowerCase(x):
    print(x.lower())
    print(x.islower())
    x = x.lower()
    print(x.islower())

In [None]:
lowerCase("Hello world")

In [None]:
#Reminder about string indexing/slicing
def stringSlice(x, start,end):
    print(x[start:end])

In [None]:
stringSlice("hello world",0,2)
stringSlice("hello world",1,5)
stringSlice("hello world",0,len("hello world"))

In [None]:
#introduction to negative indexing
def singleIndex(x, index):
    print(x[index])

In [None]:
singleIndex("index",1)
singleIndex("index",0)
singleIndex("index",-1)

In [None]:
def StringMath(x,y,z):
    #adding two strings together works as a concatenation
    print(x + y)
    #multiplying a string by an integer repeats the string multiple times
    print(x*z)
    

In [None]:
StringMath("hello ", "Andrew",3)

## Tuples  

A tuple is simply an ordered collection of things. A string is actually a special type of tuple, so you already have some experience working with them. Importantly, tuples are IMMUTABLE, meaning once created, their values CANNOT be altered.

In [None]:
def makingTuples(a,b,c):
    #make tuples by using parentheses
    x = (a,b,c)
    print('Tuple: ', x)
    print("x is of type: ", type(x))
    print(" ")

In [None]:
makingTuples(1,2,3)
#members don't need to be of the same type
makingTuples("yes",2,False)
#nested tuples
makingTuples((1,2,3),"yes",17)

In [None]:
def extractTuple(a,b,c,d):
    x = (a,b,c,d)
    print(x)
    #we can extract values from tuples using indexing and slicing just like we did strings
    print(x[0])
    print(x[-2])
    print(x[0:2])
    
    #we can utilize a loop to iterate through the tuple
    for val in x:
        print(val)
        
    #the power of in
    print("dog" in x)

In [None]:
extractTuple(1,"hello",False, "dog")

In [None]:
def ChangeTuple(a,b,c,new):
    x = (a,b,c)
    print(x[1])
    x[1] = new
    print(x)

In [None]:
#should error out, Tuples are immutable
ChangeTuple(4,3,1,"change")

In [None]:
def TupleTricks(a,b,c):
    x = (a,b,c)
    print("length of tuple is: ", len(x))
    print("sum of tuple is: ", sum(x))
    print(x + x)
    

In [None]:
TupleTricks(1,2,3)

In [None]:
## Element-wise assignment of variables is a nifty thing tuples allow us to do
x,y,z = (1,2,3)
print(z)

## Lists

Lists are another one of Python's sequence collections, however they are mutable, which makes them both very powerful but also dangerous if proper care isn't taken

In [None]:
#make list with []
def MakeList(a,b,c):
    x = [a,b,c]
    print(x)
    print(type(x))
    
    #can also convert some other sequences to a list
    y = (a,b,c)
    print(type(y))
    y = list(y)
    print(type(y))

In [None]:
MakeList(1,2,"hello")

In [None]:
#lists work a lot like tuples
def ListWork(a,b,c):
    x = [a,b,c]
    #take length
    print(len(x))
    #can repeat
    print(x*3)
    #can concatenate
    print(x+[1,2])
    #indexing
    print(x[0])
    print(x[-1])
    #slicing
    print(x[0:2])
    print(x[-1::-1])
    

In [None]:
ListWork(6,7,"hello")

In [None]:
def ListMethods(a,b,c):
    x = [a,b,c]
    #append adds a value to a list
    x.append(4)
    print(x)
    
    #what if we want to add multiple items?
    y = [4,5,6]
    for yi in y:
        x.append(yi)
    print(x)
    
    #alternatively,
    z = [7,8,9]
    z.extend([1,2,3])
    print(z)
    
    #alternatively alternatively:
    new = [1,2,3] + [4,5,6]
    print(new)
    
    #ordering a list
    z.sort()
    print(z)
    z.sort(reverse=True)
    print(z)

In [None]:
ListMethods(1,2,3)

In [None]:
#The big thing about lists is that you can change elements
def ListMut(a,b,c):
    x = [a,b,c]
    print(x)
    print(x[1])
    #change by index
    x[1] = "how"
    print(x)
    
    #loop through change
    for xi in range(len(x)):
        x[xi] = randint(0,100)
    print(x)
    
    #can do something to each value and replace it
    for xi in range(len(x)):
        x[xi] = x[xi] * 10
    print(x)

In [None]:
ListMut(1,2,3)

In [None]:
#Very important, both these lists end up pointing to the same object in memory, meaning if you change one, you change them both
###THIS CAN CAUSE LOTS OF ISSUES###

def listMem(a,b,c):
    x = [a,b,c]
    y = x
    print(y)
    x[1] = "what"
    print(y)
    
    #this however will work
    x2 = [a,b,c]
    z = x2[:]
    x2[1] = "what"
    print(z)
    

In [None]:
listMem(1,2,3)

In [None]:
#Lists can be nested to form some multidimensional structure
def multiDimList(a,b,c):
    x = [a,b,c]
    x = [x] * 2
    print(x)
    
    #multiDim Splicing
    print(x[0])
    print(x[0][1])
    
    #can loop through
    x[1] = [4,5,6]
    for xi in x:
        print(xi)

In [None]:
multiDimList("happy",2,"world")

### List Comprehension (if time)

List comprehension allows us to generate a new list based on values of a previous list. It combines the list data structure, for loops, and if statements into a single helpful format.

Let's say we have a list of genes and want to pull out all the mitochondrial genes (with the "mt-" pattern). If you were to achieve this without list comprehension it would look like this:

In [None]:
def mtGenes(lst):
    mt_gene_list = []

    for gene in lst:
        if "mt-" in gene:
            mt_gene_list.append(gene)
    print(mt_gene_list)

In [None]:
mtGenes(['mt-gene1','SOX4','HOX4','mt-gene2','Sept7'])

In [None]:
#now let's use list comprehension:
def mtGenes_lc(lst):
    mt_gene_list = [gene.upper() for gene in lst if "mt" in gene]
    print(mt_gene_list)

In [None]:
mtGenes_lc(['mt-gene1','SOX4','HOX4','mt-gene2','Sept7'])

So now that we see how using list comprehension can help make our code simplier and shorter. How are they constructed?

The syntax follows this pattern:

newlist = [**expression** for **item** in **iterable** if **condition**]  

Let's break this down:  
expression: gene.upper()  
item in iterable: gene in gene_list  
condition: if "mt" in gene  

For each item (gene) in our iterable (gene_list), check the condition (see if it contains 'mt'). If so, run the expression (gene.upper) and place it in the newlist (mt_upper_gene_list)

## Dictionaries

The final collection we're going to look is the dictionary. A dictionary is similar to a list, but instead of accessing the values by their position, we can assign them to arbitrary keys. These dictionaries are stored in key-value pairs where the key is what you give the dictionary to have it return the corresponding value. It might sound confusing but when you start using them it makes a lot more sense

In [None]:
def makeDict():
    #make dictionaries using the {} symbol
    my_diction = {'key1':1,'b':2, 10:'ten'}
    #instead of using position index, pull out using key name
    print(my_diction['key1'])
    #python knows you have a dictionary so it will search for the key labeled 10, not the 10th element. Dictionaries aren't ordered
    print(my_diction[10])
    #can access the keys and values
    print('My keys are: ', my_diction.keys())
    print('My values are: ',my_diction.values())
    print('My pairs are: ', my_diction.items())
    
    #can add new values or change existing ones. can have multiple keys with the same values
    my_diction['key1'] = "new"
    my_diction['key2'] = "new"
    print(my_diction)
    
    #in only checks the keys
    print("key1" in my_diction)
    print("new" in my_diction)
    print("new" in my_diction.values())

In [None]:
makeDict()

In [None]:
def geneChrs():
    gene_dict = {'g1' : "chr1", "g2" : "chrM", "g3":"chr7"}
    #can loop through the items
    for xi in gene_dict.items():
        print("Gene", xi[0], "is on", xi[1])
    
    print("-----")
    
    #or loop through keys
    for yi in gene_dict.keys():
        print("Gene", yi, "is on", gene_dict[yi])

In [None]:
geneChrs()

## Exercises


In [None]:
#1. Given a small section of text, return how many sentences there are (only consider periods(.), ignore ? or !)
#For example providing this as input "My name is Inigo Montoya. You killed my father. Prepare to die." 
#should return 3
def sentenceFinder(text):
    pass

#2. Given a string of bases, return whether the first 3 bases encode the 'ATG' start codon
def startCodon(dna):
    pass

#3. Given a string of bases (A, G, T, and C), produce a list of the three-letter
#codons that sequence gives you. If there are 1 or 2 letters at the
#end of the sequence, ignore them.
#mkCodons("AGATTAGCCATCGGACTTGATGC") ->
#  ["AGA", "TTA", "GCC", "ATC", "GGA", "CTT", "GAT"]
def mkCodons(dna):
    pass

#4. Write a function that takes a tuple of numbers and prints their mean,
# without using the sum() function for tuples. The printed response
# should be formatted using this style:
# >>>tupleAve((1, 3, 5))
#  The average of (1, 3, 5) is 3.000000
def tupleAve(tpl):
    pass

#5. Write a function that iterates through a list and prints whether each number is positive, negative, or zero
def listSign(lst):
    pass

#6. Write a function that checks to see if a list of numbers is sorted.
def isSorted(lst):
    pass

#7. Write a function that takes a list and two numbers and swaps the elements
#at those positions. Note that the function may or may not return something, but
#the original list should be mutated. (This is called an "in-place" change)
#That is,
#a=[1,4,6]
#swap(a, 1, 2)
#print (a) -> [1, 6, 4]
def swap(lst, pos1, pos2):
    pass

#8. Write a function that takes a list and two numbers and returns a list with
#the elements at those positions swapped. The original list should *not* be
#mutated. (Functions that don't mutate their arguments or access any global
#state are called pure.)
#a = [1,4,6]
#b = swapConst(a,1,2)
#print (b) -> [1, 6, 4]
#print (a) -> [1, 4, 6]
def swapConst(lst, pos1, pos2):
    pass

#9. Use a dictionary to count the frequency of each nucleotide in this sequence:
#AGTCTGCTCTGACAGATGCAATCAGATACGTTACGATCAGTCGTACTGACTACTGCTGACTACCTGATGC
#You can just print your dictionary out as your answer
def nucDict(dna):
    pass

#10. The relationship between codons and amino acids can be well-described by
#a dictionary. If you were to construct such a dictionary, what would be the
#keys? What would be the values?