# String
- s.lower(), s.upper() -- returns the lowercase or uppercase version of the string
- s.strip() -- returns a string with whitespace removed from the start and end
- s.isalpha()/s.isdigit()/s.isspace()... -- tests if all the string chars are in the various character classes
- s.startswith('other'), s.endswith('other') -- tests if the string starts or ends with the given other string
- s.find('other') -- searches for the given other string (not a regular expression) within s, and returns the first index where it begins or -1 if not found
- s.replace('old', 'new') -- returns a string where all occurrences of 'old' have been replaced by 'new'
- s.split('delim') -- returns a list of substrings separated by the given delimiter. The delimiter is not a regular expression, it's just text. 'aaa,bbb,ccc'.split(',') -> ['aaa', 'bbb', 'ccc']. As a convenient special case s.split() (with no arguments) splits on all whitespace chars.
- s.join(list) -- **opposite of split()**, joins the elements in the given list together using the string as the delimiter. e.g. '---'.join(['aaa', 'bbb', 'ccc']) -> aaa---bbb---ccc

The '+' operator can concatenate two strings.

```
  text = (
    "%d little pigs come out, "
    "or I'll %s, and I'll %s, "
    "and I'll blow your %s down."
    % (3, 'huff', 'puff', 'house'))
```

# List
- list.append(elem) -- adds a single element to the end of the list. **Common error: does not return the new list** <--
- list.insert(index, elem) -- inserts the element at the given index, shifting elements to the **right**. **does not return** <--
- list.extend(list2) adds the elements in list2 to the end of the list. **Using + or += on a list is similar to using extend().** **does not return** <--
- list.index(elem) -- searches for the given element from the start of the list and returns its index. Throws a ValueError if the element does not appear **(use "in" to check without a ValueError).**
- list.remove(elem) -- searches for the first instance of the given element and removes it **(throws ValueError if not present)** **does not return** <--
- list.sort() -- sorts the list in place (**does not return it**). (The **sorted()** function shown later is **preferred**.) <-- 
- list.reverse() -- **reverses the list in place (does not return it)** <--
- list.pop(index) -- removes and returns the element at the given index. Returns the rightmost element if index is omitted (roughly the opposite of append()).

Assignment with an = on lists does not make a copy. Instead, assignment makes the two variables point to the one list in memory.
The '+' works to append two lists, so [1, 2] + [3, 4] yields [1, 2, 3, 4] (this is just like + with strings).

```
 list = [1, 2, 3]
  print list.append(4)   ## NO, does not work, append() returns **None**
  ## Correct pattern:
  list.append(4)
  print list  ## [1, 2, 3, 4]
```

```
  list = ['a', 'b', 'c', 'd']
  print list[1:-1]   ## ['b', 'c']
  list[0:2] = 'z'    ## replace ['a', 'b'] with ['z']
  print list         ## ['z', 'c', 'd']
```

In [75]:
list = ['larry', 'curly', 'moe']
#append
list.append('harry')

In [76]:
print(list)

['larry', 'curly', 'moe', 'harry']


In [77]:
#insert
list.insert(2, 'carry')  # return None 

In [78]:
print(list)

['larry', 'curly', 'carry', 'moe', 'harry']


In [50]:
list.extend(['testy'])  #list + ['testy']

In [51]:
print(list)

['larry', 'curly', 'carry', 'moe', 'harry', 'testy']


In [52]:
list.index('testy')

5

In [53]:
#list.index('laly')  # ValueError: 'laly' is not in list

In [54]:
'laly' in list  # better

False

In [55]:
list.remove('larry')  # return None  better to chck if in the list and then remove will do value Error

In [57]:
list.pop(3)  

'harry'

In [58]:
print(list)

['curly', 'carry', 'moe', 'testy']


In [59]:
list.pop()  # default without index is last, if index does not exist give error better to do IN check first

'testy'

In [60]:
print(list)

['curly', 'carry', 'moe']


In [61]:
list.reverse()

In [62]:
list

['moe', 'carry', 'curly']

In [63]:
list.sort()

In [64]:
list  # ascending

['carry', 'curly', 'moe']

In [72]:
tup = [(1, 7), (1, 3), (3, 4, 5), (2, 2)]

In [73]:
#sorted(my_map.items(), key=lambda kv: kv[1])
tup.sort(key=lambda x: x[-1])

In [74]:
tup

[(2, 2), (1, 3), (3, 4, 5), (1, 7)]

In [80]:
print(list.pop())

testy


In [81]:
list

['larry', 'curly', 'carry', 'moe', 'harry']

In [168]:
vowels = ['a', 'e', 'i', 'o', 'i', 'u']
vowels.count('i')

2

In [169]:
old_list = [1, 2, 3]
new_list = old_list

# add element to list
new_list.append('a')

print('New List:', new_list )
print('Old List:', old_list )

New List: [1, 2, 3, 'a']
Old List: [1, 2, 3, 'a']


In [170]:
old_list = [1, 2, 3]
new_list = old_list

# add element to list
new_list[0] = 5

print('New List:', new_list )
print('Old List:', old_list )

New List: [5, 2, 3]
Old List: [5, 2, 3]


### Copy

In [171]:
old_list = [1, 2, 3]
new_list = old_list.copy()

# add element to list
new_list.append('a')

print('New List:', new_list )
print('Old List:', old_list )

New List: [1, 2, 3, 'a']
Old List: [1, 2, 3]


In [84]:
def remove_adjacent(nums):
    list_new = []
    for i, num in enumerate(nums):
        if(i > 0 and num == nums[i-1]):
            continue
        list_new.append(num)   
    return list_new

In [85]:
nums = [1, 2, 2, 3]
print(remove_adjacent(nums))

[1, 2, 3]


In [114]:
def linear_merge(list1, list2):
    merged_list = []
#    if(list2[0] >= list1[-1]): 
#        merged_list = list1.extend(list2)
#        return merged_list
#    if(list1[0] >= list2[-1]):
#        merged_list = list2.extend(list1)
#        return merged_list
    counter = 0
    total_length = len(list1) +len(list2)
    while(len(list1)>0 and len(list2)):
        
        #pop1 = list1.pop(0)
        #pop2 = list2.pop(0)
        #print(pop1, pop2)
        if(list1[0] >= list2[0]):
            merged_list.extend([list2.pop(0)])
            print('1',merged_list)
        else:
            merged_list.extend([list1.pop(0)])
            print('2',merged_list)
    merged_list.extend(list1)  # don't know which one finished first
    merged_list.extend(list2)
    return merged_list

In [115]:
linear_merge(['aa', 'xx', 'zz'], ['bb', 'cc'])

2 ['aa']
1 ['aa', 'bb']
1 ['aa', 'bb', 'cc']


['aa', 'bb', 'cc', 'xx', 'zz']

In [116]:
linear_merge(['aa', 'xx'], ['bb', 'cc', 'zz'])

2 ['aa']
1 ['aa', 'bb']
1 ['aa', 'bb', 'cc']
2 ['aa', 'bb', 'cc', 'xx']


['aa', 'bb', 'cc', 'xx', 'zz']

In [117]:
linear_merge(['aa', 'aa'], ['aa', 'bb', 'bb'])

1 ['aa']
2 ['aa', 'aa']
2 ['aa', 'aa', 'aa']


['aa', 'aa', 'aa', 'bb', 'bb']

In [119]:
list2 = [10,3]
list1 = []
list2.extend(list1)

In [120]:
list2

[10, 3]

# Sorting
- The sorted() function can be customized through optional arguments. The sorted() optional argument reverse=True
- Custom Sorting With key=, For more complex custom sorting, sorted() takes an optional "key=" specifying a "key" function that transforms each element before comparison. The key function takes in 1 value and returns 1 value, and the returned "proxy" value is used for the comparisons within the sort.

```
strs = ['ccc', 'aaaa', 'd', 'bb']
print sorted(strs, key=len)  ## ['d', 'bb', 'ccc', 'aaaa']

## "key" argument specifying str.lower function to use for sorting
print sorted(strs, key=str.lower)  ## ['aa', 'BB', 'CC', 'zz']

## Say we have a list of strings we want to sort by the last letter of the string.
strs = ['xc', 'zb', 'yd' ,'wa']

## Write a little function that takes a string, and returns its last letter.
## This will be the key function (takes in 1 value, returns 1 value).
def MyFn(s):
return s[-1]

## Now pass key=MyFn to sorted() to sort by the last letter:
print sorted(strs, key=MyFn)  ## ['wa', 'zb', 'xc', 'yd']

# altenrative sort()
alist.sort()            ## correct
alist = blist.sort()    ## NO incorrect, sort() returns None
```

In [139]:
def function(s):
    return s[-1].upper() + s[0].lower()
strs = ['xck', 'zbo', 'ydf' ,'wa0']

sorted(strs, key = function)

['wa0', 'ydf', 'xck', 'zbo']

# Tuple

Tuples are like lists, except they are immutable and do not change size (tuples are not strictly immutable since one of the contained elements could be mutable)
```
tuple = (1, 2, 'hi')
print len(tuple)  ## 3
print tuple[2]    ## hi
tuple[2] = 'bye'  ## NO, tuples cannot be changed
tuple = (1, 2, 'bye')  ## this works
tuple = ('hi',)   ## size-1 tuple
```


```
(x, y, z) = (42, 13, "hike")
print z  ## hike
(err_string, err_code) = Foo()  ## Foo() returns a length-2 tuple
```

In [180]:
# Concatenation
# Output: (1, 2, 3, 4, 5, 6)
print((1, 2, 3) + (4, 5, 6))

# Repeat
# Output: ('Repeat', 'Repeat', 'Repeat')
print(("Repeat",) * 3)

(1, 2, 3, 4, 5, 6)
('Repeat', 'Repeat', 'Repeat')


# List comprehension

```
nums = [1, 2, 3, 4]
squares = [ n * n for n in nums ]   ## [1, 4, 9, 16]

strs = ['hello', 'and', 'goodbye']

shouting = [ s.upper() + '!!!' for s in strs ]
## ['HELLO!!!', 'AND!!!', 'GOODBYE!!!']

## Select values <= 2
nums = [2, 8, 1, 6]
small = [ n for n in nums if n <= 2 ]  ## [2, 1]

## Select fruits containing 'a', change to upper case
fruits = ['apple', 'cherry', 'banana', 'lemon']
afruits = [ s.upper() for s in fruits if 'a' in s ]
## ['APPLE', 'BANANA']
```

In [143]:
squares = [x**2 for x in range(9) if x % 2 == 0]  # no else, if comes at the end
squares = [x**2 if x % 2 == 0 else x + 3 for x in range(9)]  # if have else, both come at the beginning

# Dict Hash Table

- Python's efficient key/value hash table structure is called a "dict". The contents of a dict can be written as a series of key:value pairs within braces { }, e.g. dict = {key1:value1, key2:value2, ... }. The "empty dict" is just an empty pair of curly braces {}.

- Strategy note: from a performance point of view, the dictionary is one of your greatest tools, and you should use it where you can as an easy way to organize data. For example, you might read a log file where each line begins with an IP address, and store the data into a dict using the IP address as the key, and the list of lines where it appears as the value. Once you've read in the whole file, you can look up any IP address and instantly see its list of lines.

```
dict = {'a': 'alpha', 'o': 'omega', 'g': 'gamma'}
'a' in dict         ## True
if 'z' in dict: print dict['z']     ## Avoid KeyError
print dict.get('z')  ## None (instead of KeyError)
print dict.keys()  ## ['a', 'o', 'g']
print dict.values()  ## ['alpha', 'omega', 'gamma']

## Common case -- loop over the keys in sorted order,
## accessing each key/value
for key in sorted(dict.keys()):
print key, dict[key]

print dict.items()  ##  [('a', 'alpha'), ('o', 'omega'), ('g', 'gamma')]
```

In [184]:
dict = {'a': '3alpha', 'o': '1omega', 'g': '0gamma'}

In [165]:
def func(x):
    return x[1]
sorted(dict.items(), key = func)

[('g', '0gamma'), ('o', '1omega'), ('a', '3alpha')]

In [137]:
sorted(dict.items(), key = lambda key: key[1])

[('g', '0gamma'), ('o', '1omega'), ('a', '3alpha')]

In [183]:
sorted(dict.items(), key = lambda key: key[0])

[('a', '3alpha'), ('g', '0gamma'), ('o', '1omega')]

In [185]:
new = sorted(dict.items(), key = lambda key: key[0])

In [186]:
dict

{'a': '3alpha', 'g': '0gamma', 'o': '1omega'}

In [187]:
new

[('a', '3alpha'), ('g', '0gamma'), ('o', '1omega')]

In [188]:
sorted(dict.items(), key = lambda key: key[1])

[('g', '0gamma'), ('o', '1omega'), ('a', '3alpha')]

In [194]:
list2.reverse()

In [193]:
list2

[10, 3]

In [195]:
list2

[3, 10]

In [197]:
list2.count(3)

1

In [198]:
text = "striingsd"
text.count("s")

2

In [189]:
dict

{'a': '3alpha', 'g': '0gamma', 'o': '1omega'}

In [152]:
import numpy as np
def func(s):
    return str(np.random.random()) + s
sorted(dict.keys(), key = func)

['o', 'a', 'g']

In [151]:
sorted(dict.values(), reverse=True)

['3alpha', '1omega', '0gamma']

In [178]:
dict.pop('a')  # key popping

'3alpha'

In [179]:
dict

{'g': '0gamma', 'o': '1omega'}

In [160]:
dict.popitem()  #The popitem() returns and removes an arbitrary element 

('g', '0gamma')

In [161]:
dict

{'a': '3alpha', 'o': '1omega'}

In [162]:
dict.popitem()

('o', '1omega')

# Generator


In [142]:
sq_iterator = (x**2 for x in range(10)) 
# iterator comprehension [x**2] list comprehen, {x**2} set comprehe, {x:x**2} dict compre, (x**2) generator
sq_iterator

<generator object <genexpr> at 0x000001C72F3FC620>

In [144]:
def my_range(x):
    i = 0
    while i < x:
        yield i
        i += 1
        
for i in my_range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [153]:
lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", "Functions", "Scripting"]

def my_enumerate(list_, max_count):
    i = 0
    while (i < max_count):
        yield i, list_[i]
        i += 1
        

for i, lesson in my_enumerate(lessons, 4):
    print("Lesson {}: {}".format(i, lesson))

Lesson 0: Why Python Programming
Lesson 1: Data Types and Operators
Lesson 2: Control Flow
Lesson 3: Functions


# Files

- The open() function opens and returns a file handle that can be used to read or write a file in the usual way. The code f = open('name', 'r') opens the file into the variable f, ready for reading operations, and use f.close() when finished. Instead of 'r', use 'w' for writing, and 'a' for append. The special mode 'rU' is the "Universal" option for text files where it's smart about converting different line-endings so they always come through as a simple '\n'. The standard for-loop works for text files, iterating through the lines of the file (this works only for text files, not binary files). The for-loop technique is a simple and efficient way to look at all the lines in a text file:

```
# Echo the contents of a file
f = open('foo.txt', 'rU')
for line in f:   ## iterates over the lines of the file
print line,    ## trailing , so print does not add an end-of-line char
               ## since 'line' already includes the end-of line.
f.close()
```

- **f.readlines()** method reads the whole file into memory and returns its contents as a list of its lines
- **f.readline()** read one line at a time

- **f.read()** method reads the whole file into a single string, which can be a handy way to deal with the text all at once, such as with regular expressions 

- For writing, f.write(string) method is the easiest way to write data to an open output file

In [None]:
f = open('foo.txt', 'rU')
for line in f:   ## iterates over the lines of the file
print line,    ## trailing , so print does not add an end-of-line char
               ## since 'line' already includes the end-of line.
f.close()

In [None]:
- for line in f.readlines():
- for line in f:
- while flag = True: f.readline()
- f.read()

In [None]:
file = open("census.csv")
line = file.readline()
while line:
    line = file.readline()
    print(line)
    print(line.split(','))
    counter += 1
    if(counter>3):
        break    

In [None]:
counter = 1
with open("census.csv") as file:
    line = file.readline()
    print(line)
    while line:
        line = file.readline()
        print(line)
        print(line.split(','))
        counter += 1
        if(counter>3):
            break

# Del

In [208]:
list = ['a', 'b', 'c', 'd']
dict = {'a':1, 'b':2, 'c':3}

In [209]:
del list[0]  #  list.remove('x')  list.pop(index)

In [210]:
list

['b', 'c', 'd']

In [211]:
list.remove('b')

In [212]:
list

['c', 'd']

In [213]:
list.pop(-1)

'd'

In [214]:
list

['c']

In [202]:
del dict['a'] # dict.pop('x) dict.popitem() random

In [203]:
dict

{'b': 2, 'c': 3}

In [None]:
dict.popitem()  # random remove

In [206]:
dict.pop('b')

2

In [207]:
dict

{'c': 3}

# Regular Expressions

- a, X, 9, < -- ordinary characters just match themselves exactly. 
- . (a period) -- matches any single character except newline '\n'
- \w -- (lowercase w) matches a "word" character: a letter or digit or underbar [a-zA-Z0-9_]. Note that although "word" is the mnemonic for this, it only matches a single word char, not a whole word. \W (upper case W) matches any non-word character.
- \b -- boundary between word and non-word
- \s -- (lowercase s) matches a single whitespace character -- space, newline, return, tab, form [ \n\r\t\f]. \S (upper case S) matches any non-whitespace character.
- \t, \n, \r -- tab, newline, return
- \d -- decimal digit [0-9] (some older regex utilities do not support but \d, but they all support \w and \s)
- ^ = start, $ = end -- match the start or end of the string
- \ -- inhibit the "specialness" of a character. So, for example, use \. to match a period or \\ to match a slash. If you are unsure if a character has special meaning, such as '@', you can put a slash in front of it, \@, to make sure it is treated just as a character.

The 'r' at the start of the pattern string designates a python "raw" string which passes through backslashes without change which is very handy for regular expressions

In [215]:
import re

In [262]:
text1 = "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do:  once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, `and what is the use of a book, thought Alice `without pictures or conversation?"
text2 = 'an example word:cat!!'

In [218]:
text.find('Alice')

0

In [223]:
re.findall('alice', text.lower())

['alice', 'alice']

In [224]:
re.search('alice', text.lower())

<_sre.SRE_Match object; span=(0, 5), match='alice'>

In [234]:
#pattern = re.compile(r'[^a-zA-z]')
pattern = re.compile(r'alice')

In [235]:
pattern

re.compile(r'alice', re.UNICODE)

In [236]:
re.findall(pattern, text.lower())

['alice', 'alice']

In [238]:
re.findall(r'alice w', text.lower())

['alice w']

In [240]:
match = re.search(r'alice \w', text.lower())

In [242]:
if match:
    print(match.group())

alice w


In [248]:
re.findall(r'\w\w\w', text.lower())[0:10] # take 3letter, and check left overs loop to find all

['ali', 'was', 'beg', 'inn', 'ing', 'get', 'ver', 'tir', 'sit', 'tin']

In [250]:
re.findall(r'\w\w\w\s', text.lower())[0:10]

['ice ',
 'was ',
 'ing ',
 'get ',
 'ery ',
 'red ',
 'ing ',
 'her ',
 'ter ',
 'the ']

In [252]:
re.findall(r'\W\W', text.lower())  # none letter

[', ', ': ', ', ', ', ', ', ', ' `']

In [255]:
re.findall(r'.\W\W', text.lower())

['k, ', 'o: ', 'g, ', 't, ', 'k, ', 'e `']

In [256]:
re.findall(r'.\s\s', text.lower()) #single character + 2 space

[':  ']

In [260]:
re.findall(r'\W\s\s', text.lower()) # none word +2 space

[':  ']

In [259]:
re.findall(r'\S\S\S\S\S', text.lower())  #none space charater

['alice',
 'begin',
 'tired',
 'sitti',
 'siste',
 'bank,',
 'havin',
 'nothi',
 'twice',
 'peepe',
 'siste',
 'readi',
 'pictu',
 'conve',
 'rsati',
 'book,',
 'thoug',
 'alice',
 '`with',
 'pictu',
 'conve',
 'rsati']

In [291]:
re.findall(r'\w+\b', text.lower())[0:10]

['alice',
 'was',
 'beginning',
 'to',
 'get',
 'very',
 'tired',
 'of',
 'sitting',
 'by']

In [261]:
re.findall(r'\d', text.lower())

[]

In [263]:
re.findall(r'\d\d\d', 'p123g')

['123']

     + => 1 or more occurrences of the pattern to its left, e.g. 'i+' = one or more i's
     * => 0 or more occurrences of the pattern to its left
-     ? => match 0 or 1 occurrences of the pattern to its left
- first the search finds the leftmost match for the pattern, and second it tries to use up as much of the string as possible -- i.e. + and * go as far as possible (the + and * are said to be "greedy").
- Square brackets can be used to indicate a set of chars, so [abc] matches 'a' or 'b' or 'c'. The codes \w, \s etc. work inside square brackets too with the one exception that dot (.) just means a literal dot. For the emails problem, the square brackets are an easy way to add '.' and '-' to the set of chars which can appear around the @ with the pattern r'[\w.-]+@[\w.-]+' to get the whole email address:

In [271]:
re.findall(r'pi+', 'piiig')  

['piii']

In [265]:
re.findall(r'i+', 'piigiiii')

['ii', 'iiii']

In [266]:
re.findall(r'i*', 'piigiiii')

['', 'ii', '', 'iiii', '']

In [268]:
re.findall(r'\d\s+\d\s+\d', 'xx1 2   3xx')  # decimal , zero or more space decimal zero or more space decimal

['1 2   3']

In [278]:
re.findall(r'\d\s+\d..', 'xx12  3xx')   

['2  3xx']

In [279]:
re.findall(r'^b\w+', 'foobar')  

[]

In [280]:
re.findall(r'b\w+', 'foobar')  

['bar']

In [285]:
re.findall(r'\w+r$', 'foobar')

['foobar']

In [294]:
re.findall(r'^a\w+', text.lower())

['alice']

In [295]:
str = 'purple alice-b@google.com monkey dishwasher'
re.findall(r'\w+@\w+', str)

['b@google']

In [302]:
re.findall(r'[\-\w]+@\w+\.+\w+', str)

['alice-b@google.com']

In [313]:
str2 = 'purple alice.-b@google.com monkey dishwasher'
re.findall(r'[\.\-\w]+@\w+\.\w+', str2)

['alice.-b@google.com']

In [315]:
strings = re.findall(r'some pattern', f.read())

In [7]:
import re
text_html = '<td>1</td><td>Michael</td><td>Jessica</td>'
match = re.findall(r'\<td\>\w+\<\/td\>', text_html)

In [8]:
match

['<td>1</td>', '<td>Michael</td>', '<td>Jessica</td>']

In [336]:
x = '<td>1</td>'

x.replace('<td>','')

'1</td>'

In [344]:
x = '<td>1</td>'
replace_func(x)

'1'

In [5]:
def replace_func(x):
    return x.replace('<td>','').replace('</td>','') #replace('<td>','').replace('</td>','')

In [6]:
list(map(lambda x: replace_func(x), match))

['1', 'Michael', 'Jessica']

In [346]:
match2

<map at 0x1c72f3c30f0>

In [None]:
url_patt = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
email_patt = '[a-zA-Z0-9+_\-\.]+@[0-9a-zA-Z][.-0-9a-zA-Z]*.[a-zA-Z]+'