# Error Handling 

## introudciton 

In [3]:
def parse(raw_info):
    # "Google, 1998, yes"
    splitted = raw_info.split(',')
    info = {}
    info['name'] = splitted[0] 
    info['year'] = int(splitted[1])
    info['uses_ai'] = splitted[2] == "yes"
    return info

In [4]:
def parse_records(raw_records):
    return [parse(x) for x in raw_records]

In [5]:
parse("Goolge,1998,yes")

{'name': 'Goolge', 'year': 1998, 'uses_ai': True}

In [6]:
# index error if we dont use comma 
# parse("Goolge 1998 yes")
# value error if we place other type of value in predefined value 
# parse("Goolge I998 yes")
# Worse cbase, we have some output but not what we want 
# parse("Goolge,1998,YES")

In [7]:
# how to raise error 
raise ValueError("ValueError: invalid literal for int() with base 10: 'I998'")

ValueError: ValueError: invalid literal for int() with base 10: 'I998'

## Introduction to the exception 

In [8]:
# value error example 
try:
    year = int('i1994')
except ValueError:
    print("Conversion failed")
else:
    print("year: ", year)

Conversion failed


In [9]:
# index error example 
try:
     parse("Goolge 1998 yes")
except ValueError:
    print("Conversion failed")
except IndexError:
    print("Bad separator was use")
else:
    print("year: ", year)

Bad separator was use


In [11]:
# index error example 
try:
     parse("Goolge 1998 yes")
except (ValueError, IndexError):
    print("parsing failed")
else:
    print("year: ", year)
print("Keep going")

parsing failed
Keep going


## Exception object 

In [15]:
def parse(raw_info):
    # "Google, 1998, yes"
    splitted = raw_info.split(',')
    if len(splitted) != 3:
        raise ValueError("raw_info must contain exactly two commas ")
    info = {}
    info['name'] = splitted[0] 
    info['year'] = int(splitted[1])
    
    info['uses_ai'] = splitted[2] == "yes"
    if not info['uses_ai'] and splitted[2] != "no":
        raise ValueError("The flag mus be either 'yes' or 'no'")
    return info

In [17]:
parse("GOOLE,1934,yes")

{'name': 'GOOLE', 'year': 1934, 'uses_ai': True}

In [18]:
parse("GOOLE,I934,yes")

ValueError: invalid literal for int() with base 10: 'I934'

In [20]:
parse("GOOLE,1934,YES")

ValueError: The flag mus be either 'yes' or 'no'

In [29]:
# how we are able to handle two value error 
def parse(raw_info):
    # "Google, 1998, yes"
    splitted = raw_info.split(',')
    if len(splitted) != 3:
        raise ValueError("raw_info must contain exactly two commas ")
    info = {}
    info['name'] = splitted[0] 
    err = ValueError("Some message")
    try:
        info['year'] = int(splitted[1])
    except ValueError as err:
        #raise ValueError("The year was spelled incorreclty") from err
        raise ValueError(f'The year was spelled incorreclty. Reason:{err}') from None
    info['uses_ai'] = splitted[2] == "yes"
    if not info['uses_ai'] and splitted[2] != "no":
        raise ValueError("The flag mus be either 'yes' or 'no'")
    return info

In [30]:
parse("GOOLE,i934,yes")

ValueError: The year was spelled incorreclty. Reason:invalid literal for int() with base 10: 'i934'

In [45]:
errors = [
    ValueError(str(x)) if x % 2 == 0 else IndexError(str(x))
    for x in range(10)
]
s = 0
for x in errors:
    try:
        raise x
    except ValueError as err:
        s += int(str(err))
    except IndexError as err:
        s -= int(str(err))

print(s)

-5


## The "except Exception" pattern

In [56]:
def parse_records(records):
    result = []
    fail_count = 0
    for raw_info in records:
        try:
            record = parse(raw_info)
        #except ValueError:
        # In more general case, we can name it just exception 
        except Exception:
            fail_count += 1
            continue 
        result.append(record)
    return result, fail_count  

In [58]:
# AttributeError: 'NoneType' object has no attribute 'split'
parse(None)

AttributeError: 'NoneType' object has no attribute 'split'

In [57]:
# only one result, because anohter one is broken 
# so it's good that the function return some statistic with it 
parse_records([
    "Google,1998,yes",
    "facebook,2004,YES",
    None
])

([{'name': 'Google', 'year': 1998, 'uses_ai': True}], 2)

# map, enumerate, zip, generator expressions

## map 

In [83]:
def count_unique_words(queries):
    # queries = ['install numpy', "install pandas"]
    words = set()
    for x in queries:
        query_words = x.split()
        words.update(query_words)
    return len(words)

In [84]:
count_unique_words(['install numpy', "install pandas"])

3

In [94]:
queries = ['Install Numpy??',
          'Install Panads',
           ''
          ]

In [95]:
def preprocess(query):
    query = query.lower()
    chars = [x for x in query if x.isalpha() or x == ' ']
    return ''.join(chars)

In [97]:
count_unique_words(map(preprocess, queries))

3

In [113]:
# Correct Response 
# Object numbers is empty after the first usage.
numbers = map(lambda x: x**2, range(3))
print(*list(map(lambda x: x**2, numbers)), sep=',', end='|')
print(*list(map(lambda x: x**2, numbers)), sep=',')

0,1,16|


In [116]:
def find_empty_queries(queries):
    return [i for i, x in enumerate(queries) if not x]

In [130]:
# map 
count = 0 
for i, first in enumerate(map(preprocess, queries)):
    if first == queries[i]:
        count += 1
count

1

In [131]:
# zip 
count = 0
for first, second in zip(map(preprocess, queries),queries):
    #print(first, second)
    if first == second:
          count += 1
count

1

In [126]:
# normal expression: [x for x in range(19) if x % 2 == 0]
# lazy entity
even_numbers = (x for x in range(19) if x % 2 == 0)

In [128]:
new_queries = (preprocess(x) for x in queries if x)
list(new_queries)

['install numpy', 'install panads']

## iterator 

In [134]:
# sometimes data is so large to fit in the memory 
# you need to use some special lib return iterators instead of downloading the whole table

In [135]:
result = map(abs, range(-3, 4))
print(next(result))
print(list(result))

3
[2, 1, 0, 1, 2, 3]


In [None]:
very_big_table = [
    {'count': x}
    for x in range(100)
]

def iterate_over_records():
    return iter(very_big_table)
    
it = iterate_over_records()
count = 0
for x in it:
    count += x['count']
    

## Iterator

In [174]:
x = [1, 2]
for x in a:
    print(x)

In [177]:
# this is the sam
it = iter(a)
while True:
    try:
        x = next(it)
    except Exception:
        break 
    print(x)

In [182]:
# How do you get only the first element from result without emptying all result?
result = map(lambda x: x**2, range(10))
next(result)

0

In [157]:
#What does this code print?
result = map(abs, range(-3, 4))
print(next(result))
print(list(result))

3
[2, 1, 0, 1, 2, 3]


In [181]:
# What is the best way to calculate the sum of "count" fields for all table entries?
very_big_table = [
    {'count': x}
    for x in range(10000)
]

def iterate_over_records():
    return iter(very_big_table)
    
it = iterate_over_records()
count = 0

#ans 
for x in it:
    count += x['count']
count


49995000

## Funcional Programming in Python 

In [151]:
import itertools
list(zip('ABCD', 'xy'))

[('A', 'x'), ('B', 'y')]

In [147]:
import itertools
list(itertools.zip_longest('ABCD', 'xy'))

[('A', 'x'), ('B', 'y'), ('C', None), ('D', None)]

In [155]:
x = [1, 2, 3]
y = [4, 5, 6]
zipped = zip(x, y)
list(zipped)

x2, y2 = zip(*zip(x, y))
x == list(x2) and y == list(y2)

True

In [188]:
s = [1,2,3,4,5,6,7,8,9]
n = 3
# 1. iter() is an iterator over a sequence
# 2. [x] * n produces a list containing n quantity of x
# 3. *arg unpacks a sequence into arguments for a function call.
# herefore you're passing the same iterator 3 times to zip()
# and it pulls an item from the iterator each time.
list(zip(*[iter(s)]*n))

[(1, 2, 3), (4, 5, 6), (7, 8, 9)]

In [187]:
x = iter([1,2,3,4,5,6,7,8,9])
list(zip(x, x, x))

[(1, 2, 3), (4, 5, 6), (7, 8, 9)]

In [189]:
chunk_size = 3
L = [1,2,3,4,5,6,7,8,9]

# iterate over L in steps of 3
for start in range(0,len(L),chunk_size): # xrange() in 2.x; range() in 3.x
    end = start + chunk_size
    print (L[start:end]) # three-item chunks

[1, 2, 3]
[4, 5, 6]
[7, 8, 9]


In [None]:
# when an iterator yields (= returns) an item, you can imagine this item as "consumed". 
# So the next time the iterator is called, it yields the next "unconsumed" item. –

# File

## introduction 

In [196]:
# Windows 
'C:\\Users\\aliciaqi\\Desktop'

'C:/User/aliciaqi/Desktop'

In [197]:
# Linux 
'/home/aliciaqi/Desktop'

'/home/aliciaqi/Desktop'

In [198]:
# macOS
'/Users/aliciaqi/Desktop'

'/Users/aliciaqi/Desktop'

In [200]:
# get current work directory 
import os
os.getcwd()

'/Users/aliciaqi/Desktop/Week 3'

In [203]:
# get all file in current work directory 
# asysnomon: os.listdir('.') 
os.listdir()

['Summary .ipynb', '.ipynb_checkpoints']

In [206]:
# get file name in other working directory 
os.listdir('/Users/aliciaqi/Desktop/trepp-ds')

['.DS_Store', 'CLO-NLP', 'README.md', '.gitignore', '.github', '.git']

In [207]:
os.listdir('.')

['Summary .ipynb', '.ipynb_checkpoints']

In [208]:
# get abs path 
os.path.abspath('Summary .ipynb')

'/Users/aliciaqi/Desktop/Week 3/Summary .ipynb'

## Reading from files, writing to files

In [212]:
import os 
os.getcwd()

'/Users/aliciaqi/Desktop/Week 3'

In [216]:
os.listdir()

['Summary .ipynb', '.ipynb_checkpoints', 'company.txt']

In [234]:
# f is a interator 
f = open("company.txt")
# if you read again it is a empty. You have to open twice to get content 
f.read()
f.close()

In [224]:
f = open("company.txt") # equal to f = open("company.txt", 'r')
f.close()

In [227]:
f = open("company.txt") # equal to f = open("company.txt", 'r')
print(f.readlines())
f.close()

['name, founded\n', 'Google, 1998\n', 'Yonder, 1997']


In [231]:
f = open("company.txt") # equal to f = open("company.txt", 'r')
# read line by line 
f.readline()
f.readline()
f.readline()
f.close()

In [233]:
# files are iterative 
f = open("company.txt")
for x in f:
    print(x)
f.close()

name, founded

Google, 1998

Yonder, 1997


In [236]:
# f is an iterator   
f = open("company.txt")
list(f)
f.close()

In [237]:
# remove the white space 
f = open("company.txt")
for x in f:
    print(x.strip())
f.close()

name, founded
Google, 1998
Yonder, 1997


In [270]:
# write to file 
words = set()
f = open("company.txt")
for x in f: 
    words.update(x.strip().split(','))
f.close()

In [271]:
words

{'1997', '1998', 'Company', 'Facebook', 'Founded', 'Google'}

In [272]:
# open in right mode 
fw = open("words.txt", 'w')
for x in words:
    fw.write(x)
    fw.write('\n')
fw.close()

In [273]:
f = open("words.txt")
print(f.read())

f.close()

Founded
1997
1998
Company
Google
Facebook



In [278]:
# this way, you dont have to open and close 
# it's done automatically
# suitable for all mode 
with open("words.txt") as f:
    print(f.readlines())

['Founded\n', '1997\n', '1998\n', 'Company\n', 'Google\n', 'Facebook\n']


## Jason 

In [None]:
import json

In [279]:
request = {
    'type': 'like',
    "video_id" : 10,
    "user_id" : 20
}

In [284]:
x = 1
x = json.loads(s)
type(x), x

(int, 1)

In [287]:
# in order to load string, we have to use double quote 
json.loads('"abc"')

'abc'

In [290]:
# boolean
json.loads('true')

True

In [293]:
json.loads('[1, true, "abc",[1, true,"abc"]]')

[1, True, 'abc', [1, True, 'abc']]

In [295]:
x = json.loads('{"a" : 1}')

In [298]:
x

{'a': 1}

In [302]:
# keys have to be string in json. 
# double quote 
s = """
{
 "a" : 1,
 "b" : true,
 "c" : [2.0, "abc"],
 "d": {}
}
"""

In [303]:
json.loads(s)

{'a': 1, 'b': True, 'c': [2.0, 'abc'], 'd': {}}

In [None]:
request = {
    'type': 'like',
    "video_id" : 10,
    "user_id" : 20
}

In [304]:
s = """
{
    "type" : "like",
    "video_id" : 10, 
    "user_id" : 20
}

"""

In [306]:
request == json.loads(s)

True

In [316]:
# dump jason object 
json.dumps(request)

'{"type": "like", "video_id": 10, "user_id": 20}'

In [317]:
#json.load
#json.dump

In [319]:
with open("request.json" , 'w') as f:
    json.dump(request, f)

In [320]:
with open("request.json") as f:
    print(f.read())

{"type": "like", "video_id": 10, "user_id": 20}


In [322]:
with open("request.json") as f:
    x = json.load(f)

In [323]:
x

{'type': 'like', 'video_id': 10, 'user_id': 20}