# Lecture 8 - Strings 



In [1]:
# Write a function combine that takes in 2 strings
# and concatenates them with a space in between

...

combine("Good", combine("morning", "CSE20"))

'Good morning CSE20'

In [1]:
s = "A long string"

len(s)

13

# String operators

Python provides some surprising ways to manipulate strings

In [1]:
# You can concatenate strings together
s = "Lets" + "add" + "together" + "strings"

print(s) # Note it just puts them one after the other 
# (i.e. it doesn't do any whitesppace addition)

Letsaddtogetherstrings


In [2]:
s = "Hello" * 10 # The multiplication operator allows you to make a 
# a sequence of strings

print(s) 

HelloHelloHelloHelloHelloHelloHelloHelloHelloHello


In [3]:
# Note this doesn't work

s = "You can't" - "subtract strings" # What would this even do?

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [4]:
# Nor does this

s = "You can't" / "divide strings either"

TypeError: unsupported operand type(s) for /: 'str' and 'str'

# Length function

The length of a string is given by the len() function

In [1]:
s = "A long string"

len(s)

13

In [6]:
s = "" # The empty string case

len(s)

0

# Selecting Characters from a String

Zero-based indices (like lists), running from 0 to len(s)-1

In [1]:
s = "My string"

s[0] # Let's select the first character

'M'

In [2]:
s[1] # The second character

'y'

In [3]:
s[8] # Trying to address a character beyond the length of the string creates 
# an error

'g'

In [4]:
s[9] # Trying to address a character beyond the length of the string creates 
# an error

IndexError: string index out of range

In [5]:
# A character is just another string in Python
s = "My string"

print(s)
print(type(s))

print(s[0])
print(type(s[0]))


My string
<class 'str'>
M
<class 'str'>


In [11]:
len(s[0]) # It is a string with length 1

1

# Slices

You will often find you want to work with substrings: sub-portions of a string. Python is really nice for this.

In [9]:
# Beyond indexing single characters, you can slice strings to create substrings

s = "A long string"

s[0:6] # The 'prefix' substring of the first 6 characters

'A long'

A slice s[x:y] is zero-based, and from x (inclusive) to y (exclusive), just like range(x,y)

In [1]:
# Zero length case

s = "A long string"

s[6:6] # The interval from 6 (inclusive) to 6 (exclusive) is empty


''

In [14]:
# Negative length strings?

s = "A long string"

s[6:0] # If the second index occurs before the first index it won't
# throw an error, just make a zero length (empty) string

''

In [2]:
# Python also gives you useful shorthand where you omit the start or end of a slive

s[:6] # This is the same as s[0:6], and is called a prefix of s

'A long'

In [3]:
s[6:] # This is the same as s[6:13] or s[6:len(s)], and is called a suffix of s

' string'

In [7]:
s[:] 
# This is just s[0:len(s)], ie the whole string

'A long string'

In [18]:
s[::2] # This is every second character! (step of 2)

'Aln tig'

# Challenge 1

In [19]:
s = "A long string"

# Write down an expression that concatenates two slices of s to get "long ring"


'long ring'

# Negative slicing coordinates

In [20]:
# Negative coordinates let you slice from the other end of the string
# (it's surprising how often this proves to be useful)

s = "A long string"

s[-1] # This is the last character of s

'g'

In [21]:
s[-2] # The second to last

'n'

In [22]:
s[-100] # This throws an error, because it implies a character before 
# the start of the string

IndexError: string index out of range

In [23]:
# You can also slice using negative coordinates:

s[:-1] # Get the n-1 prefix 

'A long strin'

In [24]:
s[-2:-1] # Get the penultimate character

'n'

# Challenge 2

In [22]:
s = "A long string"

# Give a slice of s that reverses s ! (hint: try a negative step)


'gnirts gnol A'

# Immutability

Strings are immutable - that is you can't edit a string, you can only make new strings by copying them.

In [26]:
x = "Strings can't be changed"

# This doesn't work

x[0] = 's'


TypeError: 'str' object does not support item assignment

In [27]:
x = "Strings can't be changed"

# This doesn't work

#x[0] = 's'

# To make s lower case you could instead do:

x = 's' + x[1:]

print(x)

strings can't be changed


Immutable data has some very nice properties.

Immutable data is easy to share across different parts of a program, because we are guaranteed that one bit of the code can't change the data and cause unexpected behaviour in another part of the program that was not expecting these changes. 

Ints, floats, booleans and strings are all immutable in Python.

# String comparison


In [28]:
# We saw this already, but Python compares strings lexicographically

x = "Aardvarks"
y = "Apples"

x < y # This is true, because Aardvarks is before (less than) Apples in the dictionary

True

In [29]:
x == "aardvarks" # This is false because string comparison is case sensitive

False

In [30]:
x.lower() == "aardvarks" # The call to .lower() changes the string to lower case

True

# In operator

We can easily search within a string to find if it contains a given substring:

In [2]:
s = "once upon a time there lived a wicked teacher"

"wicked" in s

True

In [4]:
# You can also use 'not in'

"wicked" not in s    # Same as not("wicked" in s)


False

# For loops on strings

In [6]:
# You can easily iterate through the characters in a string
# using a for loop:

s = "toys"
for i in range(0, len(s)):
  print(alphabet[i])

t
o
y
s


In [7]:
# Or better:

for i in s:
  print(i)          # i is bound to each character in the string

t
o
y
s


# Challenge 3

In [9]:
s = "toys"

# Print all possible non-empty slices of s
# Use two nested loops for the start and end of the slice
# There are 10 such slices



t
to
toy
toys
o
oy
oys
y
ys
s


# Examples of functions processing strings

In [10]:
# We can use loops to do neat processing to strings

def remove_vowels(s): 
  """Remove vowels from a string """
  vowels = "aeiouAEIOU"
  r = ""
  for x in s:           # For each character in s
    if x not in vowels: # If not a vowel
      r = r + x         # makes a new string
  return r

remove_vowels("compsci")

'cmpsc'

In [37]:
# Search for first instance of a character string

def find_character(s, ch):
  """
  Find the first occurrence of a given character ch
  in a string s and return the position, 
  otherwise if not present, return -1
  """
  for i in range(len(s)):
    if s[i] == ch:
      return i
  return -1

find_character("once upon a time", 'u')

5

# Convenience functions

There are several useful functions on strings that Python provides, here's a non-exhaustive look:



**find**

In [11]:
# Find generalizes the find_character method above to search for substrings

s = "once upon a time there lived"

s.find("time") # Find first instance of "time" in s
  
  

12

**split**

In [12]:
# Split is useful for splitting strings into words 

s = "once upon a time there lived"

s.split()

#Q: how does split work with other whitespace characters?

['once', 'upon', 'a', 'time', 'there', 'lived']

In [40]:
# You can use it to split on specific characters, 
# consider comma separated data (csv data):

s = "0.5,0.9,17,20"

s.split(",") # ',' is used as the split character

['0.5', '0.9', '17', '20']

In [41]:
# You can also do this with tabs (e.g. tsv data)

s = "0.5\t0.9\t17\t20"

s.split("\t") # a tab is used as the split character

['0.5', '0.9', '17', '20']

**join**

In [7]:
# Join lets you concatenate a sequence of strings

l = ['once', 'upon', 'a', 'time', 'there', 'lived']

" ".join(l) # the string " " is used as the joining sequence

'once upon a time there lived'

In [8]:
# This therefore works too

",".join(l)

'once,upon,a,time,there,lived'

In [9]:
"".join(l)

'onceuponatimetherelived'

**case changing functions**

In [44]:
s = "once upon a time there lived"

s.upper() # When you feel like shouting

'ONCE UPON A TIME THERE LIVED'

In [45]:
s = "SHOUTING" 

s.lower() # The opposite

'shouting'

**lots more...**

Python strings provide lots of good functions, see:

https://docs.python.org/3/library/string.html

In [3]:
dir('')

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',
 'zfill']

In [13]:
"foo".swapcase

<function str.swapcase>

In [10]:
help("foo".swapcase)

Help on built-in function swapcase:

swapcase(...) method of builtins.str instance
    S.swapcase() -> str
    
    Return a copy of S with uppercase characters converted to lowercase
    and vice versa.



# Challenge 4

In [2]:
# Complete the function:

def how_many_occurrences(s, s2):
  """  Returns the number of times s2 occurs as a substring of s   """

how_many_occurrences("mississsippi", "ss") # Correct answer should be 3 

# PS: yes, I know mississsippi (sic) is spelled wrong, 
# just to make you think about overlapping occurrences

3

# Homework

* ZyBook Reading 8
* ZyBook Assignment 4
* Read chapter 8: http://openbookproject.net/thinkcs/python/english3e/strings.html
