In [None]:
### Python basics

# learning material from: https://docs.python.org/3/tutorial/controlflow.html

In [1]:
# print() function
print("print this line please")

print this line please


In [2]:
# python assigns variables using =
x = 1
x

1

In [3]:
# python supports two types of numbers:

# 1 integers
my_integer = 1
print(my_integer)

# 2 floating point numbers
my_float = 7.8
print(my_float)

1
7.8


In [4]:
# python is a simple calculator that supports a range of mathematical operators

# addition

print(2 + 2)

# subtraction

print(10 - 5)

# multiplication

print(8*7)

# combinations using regular parentheses

print( (10-5) * 10 )

4
5
56
50


In [8]:
# division is a bit more complicated because of the remainders

# simple division returns floating point

print( 13/7 )

# floor division using double // discards the fractional part

print( 27//7 )

# the percentage sign (%) only returns the remainder

print( 27 % 7 )

1.8571428571428572
3
6


In [9]:
# powers in Python are calculated using ** (not ^ like in R)

print( 2**2 )

print( 2 ** 3)

4
8


In [8]:
# python also supports strings

# strings are defined by either:

# 1. single quotes ''
hello1 = 'hello1'

# 2. double quotes ""
hello2 = "hello2"

print(hello1, hello2)

# the only practical difference is that using double quotes means you can have single quotes e.g. apostrophes in the string
stringer = "excuse me, that's kate's sweater"

print(stringer)

hello1 hello2
excuse me, that's kate's sweater


In [12]:
# note that you can also use a \ to escape quotes. This means that you can include apostrophes etc. in the string

print( 'doesn\'t' )

print( "\"hello\"" )

doesn't
"hello"


In [13]:
# in python, we have a few special characters e.g. \n means new line

# but, sometimes, we don't want to the \n to be interpreted as as a special character

# to fix this, we need to tell python to use raw strings by putting an r before the string

print('C:\some\name')

print(r'C:\some\name')

C:\some
ame
C:\some\name


In [14]:
# in addition, python has a useful syntax of triple quotations

# the triple quotations have a range of uses

# 1. you don't have to escape quotations in a string

x = """ "hello" """
print(x)

 "hello" 


In [15]:
# 2. you can write multi-line comments

"""this is a very long comment that looks weird
when you put it all in one line"""

x = 5
print(x)

5


In [16]:
# 3. you can write multiline strings

x = """

UPDATE X if:

     1. TRUE
     
     2. FALSE
     
     3. NA

"""

print(x)



UPDATE X if:

     1. TRUE
     
     2. FALSE
     
     3. NA




In [22]:
# you can also remove the end-of-lines and tell Python to continue on the next line using the backslash

# this allows you to either remove the end of line characters or to make a new string for each line without a space

# remove end of line character (don't really understand this...)

x = """\

UPDATE X if:

     1. TRUE
     
     2. FALSE
     
     3. NA \

"""

print(x)

# tell the string to continue separately on each new line

y = """\

TRUE \
FALSE \
NA \

"""

print(y)


UPDATE X if:

     1. TRUE
     
     2. FALSE
     
     3. NA 


TRUE FALSE NA 



In [23]:
# python is also different from R in that you can perform operators on strings
hello = "hello"
world = "world"
helloworld = hello + " " + world

print(helloworld)

# you can even use multiplication

x = 3 * "a" + "b"

print(x)

hello world
aaab


In [24]:
# string literals are things are actually in strings can be concatenated just by being next to each other

"Py" "thon"

'Python'

In [26]:
# but this does not work with a string object and a literatl
prefix = "Py"

prefix "thon"

SyntaxError: invalid syntax (Temp/ipykernel_13528/489461988.py, line 4)

In [28]:
# in such a case, you need to use the plus
prefix = "Py"
prefix + "thon"

'Python'

In [1]:
# but, you cannot mix strings and numbers with operators
a = 1
b = 2
c = "hello"
print(a + b + c)

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [30]:
# strings can be indexed unlike in R which is quite useful (note that they start at zero, not one)

word = "geriatric"

print( word[5] )

print( word[1] )

print( word[0] )

t
e
g


In [32]:
# indexing in python with negative numbers works by counting backwards

print(word[-3] )

r


In [33]:
# slicing i.e. generating substrings 

print( word[0:2] ) # from 0 until 2 but excluding 2

ge


In [35]:
# the defaults for slicing are quite useful

# omitted first position defaults to zero

print(word[:4]) # here, four is excluded

# omitted last position defaults to the length of the string

print(word[2:]) # two is included

geri
riatric


In [None]:
# the slicing behaviour of Python essentially means that indices point between numbers i.e.:

# zero points left of starting

# string length points right of ending

In [36]:
# python strings are immutable i.e. you cannot change individual elements

# for example, if we wanted to change a string called Python to Jython, we cannot do this:

word = "Python"
word[0] = "J"

TypeError: 'str' object does not support item assignment

In [38]:
# you would have to create a new string
word2 = "J" + word[1:]
print(word2)

Jython


In [41]:
# python has a built in function called len that outputs the length of a string
len(word2)

'Pyth'

In [10]:
# python differs from R in that we can assign multiple objects simultaneously

a, b = 2, 3

print(a, b)

2 3


In [42]:
### lists

# list are compound data types that group together different objects

# they are essentially comma-separated items created using square brackets
squares = [1, 4, 9, 16, 25]

# they can be indexed in the same way as string
print( squares[0] )
print( squares[0:3] )

1
[1, 4, 9]


In [43]:
# lists can also easily be concatenated using + signs

# this is a big difference from R where this operation would be vectorised

squares + [36]

[1, 4, 9, 16, 25, 36]

In [50]:
# unlike strings, lists are mutable so you can change individual elements
cubes = [1, 8, 27, 65]
print(cubes)

# no! 4**3 is not 65
cubes[3] = 4**3

print(cubes)

[1, 8, 27, 65]
[1, 8, 27, 64]


In [51]:
# you can also add things to lists using the append method
cubes.append(125)

print(cubes)

[1, 8, 27, 64, 125]


In [54]:
# replacements can also be done on slices
cubes[2:4]

cubes[2:4] = [57, 28]

print(cubes)

[1, 8, 57, 28, 125]


In [55]:
# you can also remove elements in the same way
cubes[2:4] = []

print(cubes)

[1, 8, 125]


In [56]:
# like with strings, we use the built-in len() function to get the length of a list
len(cubes)

3

In [57]:
# you can also create nested lists i.e. lists within lists
x = [5, 6, 7]
n = ["a", "b"]

[x, n]

[[5, 6, 7], ['a', 'b']]

In [None]:
### basics of programming i.e. iteration and writing loops

In [59]:
# write a while loop to generate the fibonacci series
a, b = 0, 1
while a < 10:
    print(a)
    a, b = b, a+b

0
1
1
2
3
5
8


In [60]:
# see how this works with more replicates
a, b = 0, 1
while a < 1000:
    print(a, end = ",")
    a, b = b, a+b

0,1,1,2,3,5,8,13,21,34,55,89,144,233,377,610,987,

In [67]:
# if statements are very similar to R

# note that the indented if, elif etc. must be aligned for the code to run

x = int(input("Please enter an integer: "))
print(x)

if x < 0:
    x = 0
    print("negative changed to zero")
elif x == 0:
    print("zero")
elif x == 1:
    print("single")
else:
    print("more")

Please enter an integer: 5
5
more


In [68]:
# for loops are also quite similar to R

# for loops just iterate over everything in a sequence that is called

# Measure some strings:
words = ['cat', 'window', 'defenestrate']
for w in words:
    print(w, len(w))

cat 3
window 6
defenestrate 12


In [95]:
# loops can be tricky because we are iterating and modifying a set of objects

# there are two strategies to deal with this

# example, we create a dictionary {}
# this has different elements which are key: value pairs pairs (i.e. key = Hans, value = active)
# we can call different items from this dictionary using users.items()
users = {'Hans': 'active', 'Éléonore': 'inactive', '景太郎': 'active'}
print(users.items())
print(users['Hans'])

dict_items([('Hans', 'active'), ('Éléonore', 'inactive'), ('景太郎', 'active')])
active


In [93]:
# 1. Iterate over a copy

for user, status in users.copy().items():
    if status == 'inactive':
        del users[user]

print(users)

{'Hans': 'active', '景太郎': 'active'}


In [96]:
# it's quite confusing what is happening with the for loop above
# https://kanoki.org/2019/12/04/how-to-iterate-through-a-python-dictionary/

# I think it means that, for each key, value pair in d.items
# therefore, the first thing is the name we assign to the key (i.e. user above)
# the second thing is the name we assign to the value (i.e. status above)
"""for key,value in d.items():
         print(key,":",value)"""

# this allows us to loop over each key-value pair

'for key,value in d.items():\n         print(key,":",value)'

In [97]:
# 2. make a new output file
active_users = {}
for user, status in users.items():
    if status == 'active':
        active_users[user] = status

print(active_users)

{'Hans': 'active', '景太郎': 'active'}


In [105]:
# the range() function for iterating over a range of numbers

# this generates a range of integers from 0 but not including 10
x = range(10)
print( list(x) )

# we can also specify the starting and end points for range
y = range(5, 10)
print( list(y) )

# we can also specify the starting and end points along with a step
z = range(5, 10, 2)
print( list(z) )

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[5, 6, 7, 8, 9]
[5, 7, 9]


In [106]:
# for iteration, therefore, we can do something like this:
for i in range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [None]:
# note that a range object can behave as a list i.e. for loop above but it only behaves as a list when you iterate over it

In [None]:
# loops can have "else" clauses:

# 1. in for loops, it engages when the loop runs out of things to iterate

# 2. in while loops, it engages when the statement is no longer true

In [113]:
# let's see an example:

for n in range(2, 10):
    for x in range(2, n):
        if n % x == 0:
            print(n, 'equals', x, '*', n//x)
            break
    else:
        print(n, 'is a prime number')

2 is a prime number
3 is a prime number
4 equals 2 * 2
5 is a prime number
6 equals 2 * 3
7 is a prime number
8 equals 2 * 4
9 equals 3 * 3


In [123]:
# the continue statement tells the loop to move on the next iteration

for num in range(2, 10):
    if num % 2 == 0:
        print("Found an even number", num)
        continue
    print("Found an odd number", num)


Found an even number 2
Found an odd number 3
Found an even number 4
Found an odd number 5
Found an even number 6
Found an odd number 7
Found an even number 8
Found an odd number 9


In [None]:
# the pass function literally does nothing but it tells the program to ignore a statement

# I'm not quite sure how this works at the moment

# while True:
    # pass

In [3]:
# match statements

# this allows multiple matching conditions to output different things

# it's similar to using if and elif statements

# these don't seem to be working for me so I will move on in the interest of time (match is only available in later version of Python)

# therefore this code cannot work for me

# point is an (x, y) tuple
def http_error(status):
    match status:
        case 400:
            return "Bad request"
        case 404:
            return "Not found"
        case 418:
            return "I'm a teapot"
        case _:
            return "Something's wrong with the internet"

SyntaxError: invalid syntax (Temp/ipykernel_20412/2445205170.py, line 9)

In [6]:
# check the python version

from platform import python_version
print(python_version())

3.9.7


In [1]:
# writing functions

# we initialise writing a function using the def command

# this is followed by a function name with parentheses and then arguments (similar to R)

def fib(n):
    """Print a Fibonacci series up to n."""
    a, b = 0, 1
    while a < n:
        print(a, end = ' ')
        a, b = b, a+b
    print()
    
fib(2000)    

0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 


In [2]:
# the function above does not actually return numbers but a just a print out of the numbers

# often we would like to actually have some numbers i.e. like a vector returned

# to return numbers, we need to use the return command

def fib2(n):  # return Fibonacci series up to n
    """Return a list containing the Fibonacci series up to n."""
    result = []
    a, b = 0, 1
    while a < n:
        result.append(a)    # see below
        a, b = b, a+b
    return result

fib2(1000)

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987]