# Tuples

A tuple is like a strict list. 

In [1]:
t = (1, 10, 15)
type(1)

int

In [2]:
t[0]

1

In [3]:
for item in t:
    print(item * item)

1
100
225


In [4]:
t.append(30) #this doesn't work

AttributeError: 'tuple' object has no attribute 'append'

In [5]:
carefree_list = [5, 10, 15, 20, 25]

In [6]:
carefree_list[1] = "Mr. Fluffypants"

In [7]:
t[1] = "Mr. Fluffypants"

TypeError: 'tuple' object does not support item assignment

In [None]:
#an immutable datatype
#Benefit 1: can't be changed
#Benefit 2: energy efficient

In [10]:
foo = (1,2,3,4,5,6,7)

In [11]:
import sys

sys.getsizeof(foo)

104

In [13]:
hello = [1,3,4,5,6,7]
sys.getsizeof(hello)

112

# Back to Regular Expressions
## Grouping with multiple matches in the same string

In [19]:
import re

In [20]:
test = "one 1 two 2 three 3 four 4 five 5"

In [21]:
re.findall(r"\w+ \d+", test)

['one 1', 'two 2', 'three 3', 'four 4', 'five 5']

In [22]:
for item in re.findall(r"\w+ \d+", test):
    x = item.split(" ")
    print(x[0])
    print(x[1])

one
1
two
2
three
3
four
4
five
5


In [28]:
test = "one 1 two 2 three 3 four 4 five 5"   
re.findall(r"(\w+) (\d)", test)

[('one', '1'), ('two', '2'), ('three', '3'), ('four', '4'), ('five', '5')]

In [27]:
all_subjects = open('enronsubjects.txt').read()

In [35]:
for item in re.findall(r"(\d{3})-(\d{3})-(\d{4})", all_subjects):
    print(item[0])
    print(item[1])

713
713
713
713
713
713
713
713
713
713
713
281
713
713
713
713
713
713
281
713
713
713
614
713
303
281
800
800
888


In [33]:
[item[0] for item in re.findall(r"(\d{3})-(\d{3})-(\d{4})", all_subjects)]

['713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '281',
 '713',
 '713',
 '713',
 '713',
 '713',
 '713',
 '281',
 '713',
 '713',
 '713',
 '614',
 '713',
 '303',
 '281',
 '800',
 '800',
 '888']

### monetary amounts in the subject lines
matchi somethin g like $10

In [38]:
re.findall(r"\$(\d+) ?([kKbNmM])", all_subjects)

[('10', 'M'),
 ('10', 'M'),
 ('10', 'M'),
 ('10', 'M'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('25', 'm'),
 ('40', 'M'),
 ('870', 'K'),
 ('870', 'K'),
 ('21', 'b'),
 ('6', 'm'),
 ('14', 'b'),
 ('14', 'b'),
 ('350', 'M'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('500', 'k'),
 ('550', 'M'),
 ('455', 'M'),
 ('5', 'm'),
 ('5', 'm'),
 ('5', 'm'),
 ('7', 'M'),
 ('7', 'M'),
 ('7', 'M'),
 ('7', 'M'),
 ('7', 'M'),
 ('7', 'M'),
 ('100', 'm'),
 ('7', 'M'),
 ('7', 'M'),
 ('7', 'M'),
 ('8', 'M'),
 ('8', 'M'),
 ('500', 'm'),
 ('500', 'm'),
 ('500', 'm'),
 ('80', 'm'),
 ('80', 'm'),
 ('80', 'm'),
 ('80', 'm'),
 ('80', 'm'),
 ('80', 'm'),
 ('80', 'm'),
 ('50', 'M'),
 ('25', 'M'),
 ('25', 'M'),
 ('25', 'M'),
 ('45', 'M'),
 ('45', 'M'),
 ('100', 'k'),
 ('130', 'M'),
 ('130', 'M'),


In [39]:
for item in re.findall(r"\$(\d+) ?([kKbNmM])", all_subjects):
    print(item[0])

10
10
10
10
25
25
25
25
25
25
25
25
25
25
25
25
25
25
40
870
870
21
6
14
14
350
500
500
500
500
500
500
500
500
500
550
455
5
5
5
7
7
7
7
7
7
100
7
7
7
8
8
500
500
500
80
80
80
80
80
80
80
50
25
25
25
45
45
100
130
130
130
1


In [45]:
vals = []
for item in re.findall(r"\$(\d+) ?([kKbBmM])", all_subjects):
    multiplier = item[1].lower()
    number_val = int(item[0])
    if multiplier == 'k':
        number_val *= 1000
    if multiplier == 'm':
        number_val *= 1000000
    if multiplier == 'b':
        number_val *= 1000000000
    vals.append(number_val)
sum(vals)
        
        

1349657340000

### substitution

In [48]:
message = "this is a test, this is only a test"

In [49]:
message.replace("this", "that").replace("test", "walrus")

'that is a walrus, that is only a walrus'

In [50]:
re.findall(r"(\d{3})-(\d{3})-(\d{4})", all_subjects)

[('713', '853', '4743'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('713', '222', '7667'),
 ('281', '296', '0573'),
 ('713', '851', '2499'),
 ('713', '345', '7896'),
 ('713', '345', '7896'),
 ('713', '345', '7896'),
 ('713', '345', '7896'),
 ('713', '345', '7896'),
 ('281', '367', '8953'),
 ('713', '528', '0759'),
 ('713', '850', '9002'),
 ('713', '703', '8294'),
 ('614', '888', '9588'),
 ('713', '767', '8686'),
 ('303', '571', '6135'),
 ('281', '537', '9334'),
 ('800', '937', '6563'),
 ('800', '937', '6563'),
 ('888', '296', '1938')]

In [51]:
message = "this is a test; this is only a test"
re.sub(r"[Tt]his", "that", message)

'that is a test; that is only a test'

In [52]:
re.sub(r"\b\w+\b", "WALRUS", message)

'WALRUS WALRUS WALRUS WALRUS; WALRUS WALRUS WALRUS WALRUS WALRUS'

In [None]:
re.sub(r"\b\w+\b", "WALRUS", message)

In [57]:
anon = re.sub(r"(\d{3})-(\d{3})-(\d{4})", "555-555-5555", all_subjects)

In [61]:
re.findall(r"(\d{3})-(\d{3})-(\d{4})", anon)

[('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555'),
 ('555', '555', '5555')]

In [64]:
anon2 = re.sub(r"(\d{3})-(\d{3})-(\d{4})", r"\1-\2-XXXX", all_subjects)

In [65]:
re.findall(r".{,20}\d{3}-\d{3-X{4}.{,20}}", anon2)

[]