# CHAPTER 2 Python Refresher

## Data Structures and Types


In [1]:
# zen of python
import this


The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [2]:
new_string = "This is a String"  # storing a string

id(new_string)  # shows the object identifier (address)

type(new_string)  # shows the object type

new_string  # shows the object value


'This is a String'

In [3]:
# representing integers and operations on them
num = 123

type(num)

num + 1000  # addition

num * 2  # multiplication

num /  2  # integer division


61

In [4]:
# decimal
1 + 1

# binary
bin(2)

0b1 + 0b1

bin(0b1 + 0b1)

# octal
oct(8)

oct(07 + 01)

0o10


# hexadecimal
hex(16)

0x10

hex(0x16 + 0x5)


# floating points
1.5 + 2.6

1e2 + 1.5e3 + 0.5

2.5e4

2.5e-2


# complex
cnum = 5 + 7j

type(cnum)

cnum.real

cnum.imag

cnum + (1 - 0.5j)


# strings
s1 = 'this is a string'
s2 = 'this is "another" string'
s3 = 'this is the \'third\' string'
s4 = """this is a
multiline
string"""

print s1, s2, s3, s4

print s3 + '\n' + s4


' '.join([s1, s2])

s1[::-1]  # reverses the string


# lists
l1 = ['eggs', 'flour', 'butter']
l2 = list([1, 'drink', 10, 'sandwiches', 0.45e-2])
l3 = [1, 2, 3, ['a', 'b', 'c'], ['Hello', 'Python']]

print l1, l2, l3

# indexing lists
l1
l1[0]
l1[1]
l1[0] +' '+ l1[1]

# slicing lists
l2[1:3]

numbers = range(10)
numbers
numbers[2:5]
numbers[:]
numbers[::2]

# concatenating and mutating lists
numbers * 2
numbers + l2

# handling nested lists
l3
l3[3]
l3[4]
l3.append(' '.join(l3[4]))  # append operation
l3
l3.pop(3)  # pop operation
l3



# sets
l1 = [1,1,2,3,5,5,7,9,1]

set(l1)  # makes the list as a set
s1 = set(l1)

# membership testing
1 in s1  
100 in s1

# initialize a second set
s2 = {5, 7, 11}

# testing various set operations
s1 - s2  # set difference
s1 | s2  # set union
s1 & s2  # set intersection 
s1 ^ s2  # elements which do not appear in both sets



this is a string this is "another" string this is the 'third' string this is a
multiline
string
this is the 'third' string
this is a
multiline
string
['eggs', 'flour', 'butter'] [1, 'drink', 10, 'sandwiches', 0.0045] [1, 2, 3, ['a', 'b', 'c'], ['Hello', 'Python']]


{1, 2, 3, 9, 11}

In [5]:
# dictionaries
d1 = {'eggs': 2, 'milk': 3, 'spam': 10, 'ham': 15}
d1

# retrieving items based on key
d1.get('eggs')
d1['eggs']

# get is better than direct indexing since it does not throw errors
d1.get('orange') 
d1['orange']



KeyError: 'orange'

In [6]:
# setting items with a specific key
d1['orange'] = 25
d1

# viewing keys and values
d1.keys()
d1.values()

# create a new dictionary using dict function
d2 = dict({'orange': 5, 'melon': 17, 'milk': 10})
d2

# update dictionary d1 based on new key-values in d2
d1.update(d2)
d1

# complex and nested dictionary
d3 = {'k1': 5, 'k2': [1,2,3,4,5], 'k3': {'a': 1, 'b': 2, 'c': [1,2,3]}}
d3
d3.get('k3')
d3.get('k3').get('c')


# tuples

# creating a tuple with a single element 
single_tuple = (1,)
single_tuple

# original address of the tuple
id(single_tuple)

# modifying contents of the tuple but its location changes (new tuple is created)
single_tuple = single_tuple + (2, 3, 4, 5)
single_tuple
id(single_tuple) # different address indicating new tuple with same name

# tuples are immutable hence assignment is not supported like lists
single_tuple[3] = 100



TypeError: 'tuple' object does not support item assignment

In [7]:
# accessing and unpacking tuples
tup = (['this', 'is', 'list', '1'], ['this', 'is', 'list', '2'])
tup[0]
l1, l2 = tup
print l1, l2


['this', 'is', 'list', '1'] ['this', 'is', 'list', '2']


In [8]:
# files
f = open('text_file.txt', 'w')   # open in write mode
f.write("This is some text\n")  # write some text
f.write("Hello world!")
f.close()  # closes the file

# lists files in current directory
import os
os.listdir(os.getcwd())

f = open('text_file.txt', 'r')  # opens file in read mode
data = f.readlines()  # reads in all lines from file
print data  # prints the text data


['This is some text\n', 'Hello world!']


## Controlling Code Flow

In [9]:
# if, if-elif, if-elif-else
var = 'spam'
if var == 'spam':
    print 'Spam'

var = 'ham'
if var == 'spam':
    print 'Spam'
elif var == 'ham':
    print 'Ham'


var = 'foo'
if var == 'spam':
    print 'Spam'
elif var == 'ham':
    print 'Ham'
else: 
    print 'Neither Spam or Ham'



Spam
Ham
Neither Spam or Ham


In [10]:
# Looping constructs

# illustrating for loops
numbers = range(0,5)
for number in numbers:
    print number

sum = 0
for number in numbers:
    sum += number

print sum


# role of the trailing else and break constructs
for number in numbers:
    print number
else:
    print 'loop exited normally'


for number in numbers:
    if number < 3:
        print number
    else:
        break
else:
    print 'loop exited normally'


0
1
2
3
4
10
0
1
2
3
4
loop exited normally
0
1
2


In [11]:
# illustrating while loops
number = 5
while number > 0:
    print number
    number -= 1  # important! else loop will keep running

# role of continue construct
number = 10
while number > 0:
    if number % 2 != 0:
        number -=1 # decrement but do not print odd numbers
        continue  # go back to beginning of loop for next iteration
    print number  # print even numbers and decrement count
    number -= 1  

# role of the pass construct
number = 10
while number > 0:
    if number % 2 != 0:
        pass # don't print odds
    else:
        print number
    number -= 1




5
4
3
2
1
10
8
6
4
2
10
8
6
4
2


In [12]:
# exceptions
shopping_list = ['eggs', 'ham', 'bacon']
# trying to access a non-existent item in the list
try:
    print shopping_list[3]
except IndexError as e:
    print 'Exception: '+str(e)+' has occured'
else:
    print 'No exceptions occured'
finally:
    print 'I will always execute no matter what!'
    
# smooth code execution without any errors
try:
    print shopping_list[2]
except IndexError as e:
    print 'Exception: '+str(e)+' has occured'
else:
    print 'No exceptions occured'
finally:
    print 'I will always execute no matter what!'
 


Exception: list index out of range has occured
I will always execute no matter what!
bacon
No exceptions occured
I will always execute no matter what!


## Functional Programming

In [13]:
# function with single argument
def square(number):
    return number*number

square(5)


25

In [14]:
# built-in function from the numpy library
import numpy as np
np.square(5)

# a more complex function with variable number of arguments
def squares(*args):
    squared_args = []
    for item in args: 
        squared_args.append(item*item)
    return squared_args

squares(1,2,3,4,5)



[1, 4, 9, 16, 25]

In [15]:
# assign specific keyword based arguments dynamically
def person_details(**kwargs):
    for key, value in kwargs.items():
        print key, '->', value

person_details(name='James Bond', alias='007', job='Secret Service Agent')


alias -> 007
job -> Secret Service Agent
name -> James Bond


In [16]:
# using recursion to square numbers
def recursive_squares(numbers):
    if not numbers:
        return []
    else:
        return [numbers[0]*numbers[0]] + recursive_squares(numbers[1:])

recursive_squares([1, 2, 3, 4, 5])


[1, 4, 9, 16, 25]

In [17]:
# simple lambda function to square a number
lambda_square = lambda n: n*n
lambda_square(5)

# map function to square numbers using lambda
map(lambda_square, [1, 2, 3, 4, 5])

# lambda function to find even numbers used for filtering
lambda_evens = lambda n: n%2 == 0
filter(lambda_evens, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# lambda function to add numbers used for adding numbers in reduce function
lambda_sum = lambda x, y: x + y
reduce(lambda_sum, [1, 2, 3, 4, 5])

# lambda function to make a sentence from word tokens with reduce function
lambda_sentence_maker = lambda word1, word2: ' '.join([word1, word2])
reduce(lambda_sentence_maker, ['I', 'am', 'making', 'a', 'sentence', 'from', 'words!'])



'I am making a sentence from words!'

In [18]:
# iterators

# typical for loop
numbers = range(6)
for number in numbers:
    print number

# illustrating how iterators work behind the scenes
iterator_obj = iter(numbers)
while True:
    try:
        print iterator_obj.next()
    except StopIteration:
        print 'Reached end of sequence'
        break


0
1
2
3
4
5
0
1
2
3
4
5
Reached end of sequence


In [19]:
# calling next now would throw the StopIteration exception as expected
iterator_obj.next()


StopIteration: 

In [20]:
# comprehensions
numbers = range(6)
numbers


[0, 1, 2, 3, 4, 5]

In [21]:
# simple list comprehension to compute squares
[num*num for num in numbers]


[0, 1, 4, 9, 16, 25]

In [22]:
# list comprehension to check if number is divisible by 2
[num%2 for num in numbers]


[0, 1, 0, 1, 0, 1]

In [23]:
# set comprehension returns distinct values of the above operation
set(num%2 for num in numbers)

# dictionary comprehension where key:value is number: square(number)
{num: num*num for num in numbers}

# a more complex comprehension showcasing above operations in a single comprehension
[{'number': num, 
  'square': num*num, 
  'type': 'even' if num%2 == 0 else 'odd'} for num in numbers]

# nested list comprehension - flattening a list of lists
list_of_lists = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
list_of_lists
[item for each_list in list_of_lists for item in each_list]



[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [24]:
# generators
numbers = [1, 2, 3, 4, 5]

def generate_squares(numbers):
    for number in numbers:
        yield number*number

gen_obj = generate_squares(numbers)
gen_obj
for item in gen_obj:
    print item


1
4
9
16
25


In [25]:
csv_string = 'The,fox,jumps,over,the,dog'
# making a sentence using list comprehension
list_cmp_obj = [item for item in csv_string.split(',')]
list_cmp_obj
' '.join(list_cmp_obj)

# making a sentence using generator expression
gen_obj = (item for item in csv_string.split(','))
gen_obj
' '.join(gen_obj)


'The fox jumps over the dog'

## Classes

In [26]:
# Classes 

# class definition
class Animal(object):
    species = 'Animal'

    def __init__(self, name):
        self.name = name
        self.attributes = []

    def add_attributes(self, attributes):
        self.attributes.extend(attributes) \
            if type(attributes) == list \
            else self.attributes.append(attributes)

    def __str__(self):
        return self.name+" is of type "+self.species+" and has attributes:"+str(self.attributes)

# instantiating the class
a1 = Animal('Rover')
# invoking instance method
a1.add_attributes(['runs', 'eats', 'dog'])
# user defined string representation of the Animal class
str(a1)


"Rover is of type Animal and has attributes:['runs', 'eats', 'dog']"

In [27]:
# deriving class Dog from base class Animal
class Dog(Animal):    
    species = 'Dog'

    def __init__(self, *args):
        super(Dog, self).__init__(*args) 

# deriving class Fox from base class Animal
class Fox(Animal):    
    species = 'Fox'

    def __init__(self, *args):
        super(Fox, self).__init__(*args)


In [28]:
# creating instance of class Dog
d1 = Dog('Rover')
d1.add_attributes(['lazy', 'beige', 'sleeps', 'eats'])
str(d1)


"Rover is of type Dog and has attributes:['lazy', 'beige', 'sleeps', 'eats']"

In [29]:
# creating instance of class Fox
f1 = Fox('Silver')
f1.add_attributes(['quick', 'brown', 'jumps', 'runs'])
str(f1)


"Silver is of type Fox and has attributes:['quick', 'brown', 'jumps', 'runs']"

## Working with Text

In [30]:
# String types

# simple string
simple_string = 'hello' + " I'm a simple string"
print simple_string

# multi-line string, note the \n (newline) escape character automatically created
multi_line_string = """Hello I'm
a multi-line
string!"""
multi_line_string
print multi_line_string

# Normal string with escape sequences leading to a wrong file path!
escaped_string = "C:\the_folder\new_dir\file.txt"
print escaped_string  # will cause errors if we try to open a file here

# raw string keeping the backslashes in its normal form
raw_string = r'C:\the_folder\new_dir\file.txt'
print raw_string

# unicode string literals
string_with_unicode = u'H\u00e8llo!'
print string_with_unicode




hello I'm a simple string
Hello I'm
a multi-line
string!
C:	he_folder
ew_dirile.txt
C:\the_folder\new_dir\file.txt
Hèllo!


In [31]:
# String operations

# Different ways of String concatenation
'Hello' + ' and welcome ' + 'to Python!'
'Hello' ' and welcome ' 'to Python!'

# concatenation of variables and literals
s1 = 'Python!'
'Hello ' + s1


'Hello Python!'

In [32]:
# we cannot concatenate a variable and a literal using this method
'Hello ' s1


SyntaxError: invalid syntax (<ipython-input-32-01ae724e2612>, line 2)

In [33]:
# some more ways of concatenating strings
s2 = '--Python--'
s2 * 5
s1 + s2
(s1 + s2)*3


'Python!--Python--Python!--Python--Python!--Python--'

In [34]:
# concatenating several strings together in parentheses
s3 = ('This '
      'is another way '
      'to concatenate '
      'several strings!')
s3


'This is another way to concatenate several strings!'

In [35]:
# checking for substrings in a string
'way' in s3
'python' in s3


False

In [36]:
# computing total length of the string
len(s3)



51

In [37]:
# String indexing and slicing

# creating a string
s = 'PYTHON'

# depicting string indexes
for index, character in enumerate(s):
    print 'Character', character+':', 'has index:', index



Character P: has index: 0
Character Y: has index: 1
Character T: has index: 2
Character H: has index: 3
Character O: has index: 4
Character N: has index: 5


In [38]:
# string indexing
s[0], s[1], s[2], s[3], s[4], s[5]


('P', 'Y', 'T', 'H', 'O', 'N')

In [39]:
s[-1], s[-2], s[-3], s[-4], s[-5], s[-6]


('N', 'O', 'H', 'T', 'Y', 'P')

In [40]:
# string slicing
s[:] 


'PYTHON'

In [41]:
s[1:4]


'YTH'

In [42]:
s[:3]


'PYT'

In [43]:
s[3:]


'HON'

In [44]:
s[-3:]


'HON'

In [45]:
s[:3] + s[3:]


'PYTHON'

In [46]:
s[:3] + s[-3:]


'PYTHON'

In [47]:
# string slicing with offsets
s[::1]  # no offset


'PYTHON'

In [48]:
s[::2]  # print every 2nd character in string


'PTO'

In [49]:
# strings are immutable hence assignment throws error
s[0] = 'X'


TypeError: 'str' object does not support item assignment

In [50]:

# creates a new string
'X' + s[1:]


'XYTHON'

In [51]:
# String methods

# case conversions
s = 'python is great'
s.capitalize()
s.upper()


'PYTHON IS GREAT'

In [52]:
# string replace
s.replace('python', 'analytics')


'analytics is great'

In [53]:
# string splitting and joining
s = 'I,am,a,comma,separated,string'
s.split(',')
' '.join(s.split(','))



'I am a comma separated string'

In [54]:
# stripping whitespace characters
s = '   I am surrounded by spaces    '
s
s.strip()


'I am surrounded by spaces'

In [55]:
# coverting to title case
s = 'this is in lower case'
s.title()


'This Is In Lower Case'

In [56]:
# String formatting

# simple string formatting expressions
'Hello %s' %('Python!')


'Hello Python!'

In [57]:
'Hello %s' %('World!')


'Hello World!'

In [58]:
# formatting expressions with different data types
'We have %d %s containing %.2f gallons of %s' %(2, 'bottles', 2.5, 'milk')


'We have 2 bottles containing 2.50 gallons of milk'

In [59]:
'We have %d %s containing %.2f gallons of %s' %(5, 'jugs', 10.867, 'juice')


'We have 5 jugs containing 10.87 gallons of juice'

In [60]:
# formatting using the format method
'Hello {} {}, it is a great {} to meet you'.format('Mr.', 'Jones', 'pleasure')


'Hello Mr. Jones, it is a great pleasure to meet you'

In [61]:
'Hello {} {}, it is a great {} to meet you'.format('Sir', 'Arthur', 'honor')


'Hello Sir Arthur, it is a great honor to meet you'

In [62]:
# alternative ways of using format
'I have a {food_item} and a {drink_item} with me'.format(drink_item='soda', food_item='sandwich')


'I have a sandwich and a soda with me'

In [63]:
'The {animal} has the following attributes: {attributes}'.format(animal='dog', attributes=['lazy', 'loyal'])


"The dog has the following attributes: ['lazy', 'loyal']"

In [64]:
# Using regular expressions

# importing the re module
import re

# dealing with unicode matching using regexes
s = u'H\u00e8llo'
s


u'H\xe8llo'

In [65]:
print s


Hèllo


In [66]:
# does not return the special unicode character even if it is alphanumeric
re.findall(r'\w+', s)


[u'H', u'llo']

In [67]:
# need to explicitely specify the unicode flag to detect it using regex
re.findall(r'\w+', s, re.UNICODE)


[u'H\xe8llo']

In [68]:
# setting up a pattern we want to use as a regex
# also creating two sample strings
pattern = 'python'
s1 = 'Python is an excellent language'
s2 = 'I love the Python language. I also use Python to build applications at work!'

# match only returns a match if regex match is found at the beginning of the string
re.match(pattern, s1)


In [69]:
# pattern is in lower case hence ignore case flag helps
# in matching same pattern with different cases
re.match(pattern, s1, flags=re.IGNORECASE)


<_sre.SRE_Match at 0x7f35400ea370>

In [70]:
# printing matched string and its indices in the original string
m = re.match(pattern, s1, flags=re.IGNORECASE)
print 'Found match {} ranging from index {} - {} in the string "{}"'.format(m.group(0), m.start(), m.end(), s1)


Found match Python ranging from index 0 - 6 in the string "Python is an excellent language"


In [71]:
# match does not work when pattern is not there in the beginning of string s2
re.match(pattern, s2, re.IGNORECASE)



In [72]:
# illustrating find and search methods using the re module
re.search(pattern, s2, re.IGNORECASE)
re.findall(pattern, s2, re.IGNORECASE)


['Python', 'Python']

In [73]:
match_objs = re.finditer(pattern, s2, re.IGNORECASE)
print "String:", s2
for m in match_objs:
    print 'Found match "{}" ranging from index {} - {}'.format(m.group(0), m.start(), m.end())    


String: I love the Python language. I also use Python to build applications at work!
Found match "Python" ranging from index 11 - 17
Found match "Python" ranging from index 39 - 45


In [74]:
# illustrating pattern substitution using sub and subn methods
re.sub(pattern, 'Java', s2, flags=re.IGNORECASE)


'I love the Java language. I also use Java to build applications at work!'

In [75]:
re.subn(pattern, 'Java', s2, flags=re.IGNORECASE)


('I love the Java language. I also use Java to build applications at work!', 2)