# Chapter 11 - Data Collections

## Chapter Summary

- A **list** object is a mutable sequence of arbitrary objects. Items can be accessed by indexing and slicing. The items of a list can be changed by assignment.

- Python lists are similar to arrays in other programming languages. Python lists are more flexible because their size can vary and they are heterogeneous. Python lists also support a number of useful methods.

- One particularly important data processing operation is **sorting**. Python lists have a `sort` method that can be customized by supplying a suitable key function. This allows programs to sort lists of arbitrary objects.

- Classes can use lists to maintain collections stored as instance variable. Oftentimes using a list is more flexible than using seperate instance variables.

- An entire program can be viewed as a collection of data and a set of operations&mdash;an object.

- A Python **dictionary** implements an arbitrary mapping from keys into values. It is very useful for representing non-sequential collections.

## Discussion

In [1]:
# 1. list operations

s1 = [2,1,4,3]
s2 = ['c', 'a','b']

print(s1+s2)
print(3 * s1 + 2 * s2)
print(s1[1])
print(s1[1:3])
try:
    print(s1 + s2[-1])
except TypeError:
    print('can only concatenate list (not "str") to list')

[2, 1, 4, 3, 'c', 'a', 'b']
[2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 'c', 'a', 'b', 'c', 'a', 'b']
1
[1, 4]
can only concatenate list (not "str") to list


In [2]:
# 1. list methods

s1 = [2,1,4,3]
s2 = ['c', 'a', 'b']
s1.remove(2)
print(s1)

s1 = [2,1,4,3]
s2 = ['c', 'a', 'b']
s1.sort()
print(s1)

s1 = [2,1,4,3]
s2 = ['c', 'a', 'b']
s1.append([s2.index('b')])
print(s1)

s1 = [2,1,4,3]
s2 = ['c', 'a', 'b']
try:
    s2.pop(s1.pop(2))
    print(s2)
except IndexError:
    print ('pop index out of range')
    
s1 = [2,1,4,3]
s2 = ['c', 'a', 'b']
s2.insert(s1[0], 'd')
print(s2)

[1, 4, 3]
[1, 2, 3, 4]
[2, 1, 4, 3, [2]]
pop index out of range
['c', 'a', 'd', 'b']


## Programming Exercises

In [3]:
# 1. statistics

from math import sqrt

def getNumbers():
    nums = []
    # sentinel loop to get numbers
    xStr = input('Enter a number (<Enter> to quit) >> ')
    while xStr:
        x = float(xStr)
        nums.append(x)
        xStr = input('Enter a number (<Enter> to quit) >> ')
    return nums

def mean(nums):
    sum = 0.0
    for num in nums:
        sum += num
    return sum / len(nums)

def stdDev(nums, xbar):
    sumDevSq = 0.0
    for num in nums:
        dev = num - xbar
        sumDevSq += dev**2
    return sqrt(sumDevSq/(len(nums)-1))

def median(nums):
    nums.sort()
    size = len(nums)
    midPos = size // 2
    if size % 2 == 0:
        median = (num[midPos] + num[midPos-1]) / 2.0
    else:
        median = nums[midPos]
    return median

In [4]:
# 5. Implement list methods

def my_count(myList, x):
    count = 0
    for i in myList:
        if i == x:
            count += 1
    return count

def my_isin(myList, x):
    result = False
    for i in myList:
        if i == x:
            result = True
            break
    return result

def my_index(myList, x):
    for ind, value in enumerate(myList):
        if value == x:
            break
    return ind

def my_reverse(myList):
    n = len(myList)
    for ind, value in enumerate(myList[:n//2]):
        myList[ind], myList[-ind-1] = myList[-ind-1], myList[ind]
    return myList

def my_sort(myList):
    # use selection sort algorithm
    n = len(myList)
    for bottom in range(n-1):
        mp = bottom
        for i in range(bottom+1, n):
            if myList[i] < myList[mp]:
                mp = i
        myList[bottom], myList[mp] = myList[mp], myList[bottom]
    return myList

myList = [34, 22, 111, 10, 4444, 10]
print(myList.count(10))
print(my_count(myList, 10))

print()
print(4444 in myList)
print(my_isin(myList, 4444))

print()
print(myList.index(22))
print(my_index(myList, 22))

print()
myList = [34, 22, 111, 10, 4444, 10, 33]
myList.reverse()
print(myList)
myList = [34, 22, 111, 10, 4444, 10, 33]
my_reverse(myList)
print(myList)

print()
myList = [34, 22, 111, 10, 4444, 10, 33]
myList.sort()
print(myList)
myList = [34, 22, 111, 10, 4444, 10, 33]
my_sort(myList)
print(myList)

2
2

True
True

1
1

[33, 10, 4444, 10, 111, 22, 34]
[33, 10, 4444, 10, 111, 22, 34]

[10, 10, 22, 33, 34, 111, 4444]
[10, 10, 22, 33, 34, 111, 4444]


In [5]:
# 6. shuffle list

from random import randrange

def shuffle(my_list):
    n = len(my_list)
    result = []
    for i in range(n):
        ind = randrange(len(my_list))
        result.append(my_list[ind])
        my_list.pop(ind)
    return result

print(shuffle(['c', 234, [1,2], 333]))

['c', [1, 2], 234, 333]


In [6]:
# 7. inner product

def inner_prod(list1, list2):
    result = 0
    if len(list1) != len(list2):
        print('Two lists are not of the same length!')
        return
    for i in range(len(list1)):
        result += list1[i] * list2[i]
    return result

print(inner_prod([2,3,4],[6,22,1]))

82


In [7]:
# 8. remove duplicate

def rm_dup(my_list):
    result_list = []
    for val in my_list:
        if val not in result_list:
            result_list.append(val)
    return result_list

print(rm_dup([2,2,2,2,34,11,22,11,34,5555,123,123]))

[2, 34, 11, 22, 5555, 123]


In [8]:
# 10. sieve prime

def sieve_prime(n):
    prime_list = []
    range_list = list(range(2,n+1))
    while len(range_list) > 0:
        num = range_list[0]
        prime_list.append(num)
        num_x = num
        while num_x <= n:
            if num_x in range_list:
                range_list.remove(num_x)
            num_x += num
    return prime_list

sieve_prime(20)

[2, 3, 5, 7, 11, 13, 17, 19]

In [9]:
# 11. word censor

import os

def create_file(file_name, string):
    with open(file_name,'w') as file:
        file.write(string)

def delete_file(file_name):
    try:
        os.remove(file_name)
        print("File '{}' removed!".format(file_name))
    except FileNotFoundError:
        print('No such file or directory:', file_name)

def censor(input_file, target_file):
    line_list = []
    
    with open(input_file, 'r') as file:
        for line in file:
            line_censor = ' '.join(['****' if len(w)==4 else w for w in line.split()])
            line_list.append(line_censor)
            
    text_new = '\n'.join(line_list)
    
    with open(target_file, 'w') as file:
        file.write(text_new)

create_file('tmp', 'I have a dream that one day\nall the children will be free')
censor('tmp', 'tmp_censor')
delete_file('tmp')
delete_file('tmp_censor')

File 'tmp' removed!
File 'tmp_censor' removed!


In [10]:
# 12. word censor with arbitrary set of words

import os

def create_file(file_name, string):
    with open(file_name,'w') as file:
        file.write(string)

def delete_file(file_name):
    try:
        os.remove(file_name)
        print("File '{}' removed!".format(file_name))
    except FileNotFoundError:
        print('No such file or directory:', file_name)

def censor(input_file, target_file, censor_file):
    line_list = []
    
    with open(censor_file) as file:
        censor_word = file.read().split()
    
    with open(input_file, 'r') as file:
        for line in file:
            line_censor = ' '.join(['*'*len(w) if w in censor_word else w for w in line.split()])
            line_list.append(line_censor)
            
    text_new = '\n'.join(line_list)
    
    with open(target_file, 'w') as file:
        file.write(text_new)

create_file('tmp', 'I have a dream that one day\nall the children will be free')
create_file('censor', 'a dream\nchildren')
censor('tmp', 'tmp_censor', 'censor')
delete_file('tmp')
delete_file('censor')
delete_file('tmp_censor')

File 'tmp' removed!
File 'censor' removed!
File 'tmp_censor' removed!


In [11]:
# 13. & 14. cards

from random import randrange

def draw_one_card():
    suit_list = ['Diamonds','Clubs','Hearts','Spades',]
    rank = randrange(1,14)
    suit = suit_list[randrange(4)]   
    return rank, suit

def draw_n_cards(n):
    card_list = []
    for i in range(n):
        card_list.append(draw_one_card())
    return card_list

def return_second(t):
    return t[1]

def sort_cards(n):
    cards = draw_n_cards(n)
    cards.sort()
    cards.sort(key=return_second)
    return cards

sort_cards(5)

[(1, 'Diamonds'),
 (12, 'Diamonds'),
 (4, 'Spades'),
 (5, 'Spades'),
 (13, 'Spades')]

In [12]:
# 15. deck class

from itertools import product

class deck:
    
    def __init__(self):
        suit_list = ['Diamonds','Clubs','Hearts','Spades',]
        self.cards = list(product(suit_list, range(1,14)))
        
    def shuffle(self):
        shuffled_cards = []
        for i in range(51):
            card = self.cards[randrange(len(self.cards))]
            shuffled_cards.append(card)
            self.cards.remove(card)
        self.cards = shuffled_cards
        
    def dealCard(self):
        card = self.cards[0]
        self.cards.remove(card)
        print(card)
        return card
    
    def cardsLeft(self):
        return len(self.cards)
    
my_deck = deck()
my_deck.shuffle()
for i in range(10):
    my_deck.dealCard()

('Hearts', 12)
('Hearts', 13)
('Clubs', 2)
('Hearts', 2)
('Spades', 1)
('Spades', 2)
('Diamonds', 7)
('Diamonds', 12)
('Spades', 12)
('Clubs', 11)


In [13]:
# 18. Random Walk

from random import random

def random_walk(start_pos):
    if random() > 0.5:
        end_pos = start_pos + 1
    else:
        end_pos = start_pos - 1
    return end_pos

def cross(length):
    pos = 0
    while length/2 >= pos >= -length/2:
        pos = random_walk(pos)
        if pos > length/2:
            cross = 1
        elif pos < -length/2:
            cross = -1
    return cross

def random_cross(n, length):
    cross_list = []
    for i in range(n):
        cross_list.append(cross(length))
    right_cross = sum([c for c in cross_list if c>0])
    left_cross = sum([c for c in cross_list if c<0])
    return right_cross, left_cross
    
random_cross(100, 10)

(46, -54)