### Reading a CSV file

Data manipulation without Pandas library.

In [None]:
import csv

%precision 2

In [None]:
with open('../data/mpg.csv') as csvfile:
    data = list(csv.DictReader(csvfile))

In [None]:
data[0]

<br>
Here's a complex example where we are grouping the cars by class, and finding the average hwy  mpg for each group.

In [None]:
vehicleClass = set(d['class']for d in data) # unique values for class type
vehicleClass

In [None]:
MpgByClass = []

for cls in vehicleClass:
    sum_mpg = 0
    count_mpg = 0
    for d in data:
        if d['class'] == cls:
            sum_mpg += float(d['hwy']) # import d['hwy'] is string
            count_mpg += 1
    MpgByClass.append((cls, sum_mpg / count_mpg))

In [None]:
print(MpgByClass)

In [None]:
# sorting MpgByClass by mpg
MpgByClass.sort(key = lambda x: x[1], reverse = True)
MpgByClass

---
### Python Dates and Times

In [None]:
import time
import datetime as dt

<br>
`time` returns the current time in seconds since the Epoch. (January 1st, 1970)

In [None]:
timestamp = time.time() 
timestamp

In [None]:
dtnow = dt.datetime.fromtimestamp(timestamp)
dtnow

In [None]:
dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second

In [None]:
dtnow_ease = datetime.datetime.now() # easier way
dtnow_ease

In [None]:
dt.timedelta(days=100)

In [None]:
dtnow + dt.timedelta(days=100)

---
### Advanced Python Objects

In [None]:
class Person:
    department = "School of Date Science"
    
    def set_name(self, new_name):
        self.name = new_name
    def set_location(self, new_location):
        self.location = new_location

In [None]:
person = Person()
person.set_name("Jason Suk")
person.set_location("Hong Kong")

In [None]:
print("A person named {} is in {} lives in {}.".format(person.name, person.department, person.location))

Below class examples are retrieved from [w3schools.com](https://www.w3schools.com/python/python_classes.asp)

In [None]:
class Person: # capital letter to start as a convention
    
    def __init__(self, new_name, new_age):
        self.name = new_name
        self.age = new_age
        
    def myfunc(self):
        print("Hello my name is " + self.name)

#### The __init__() Function

To understand the meaning of classes we have to understand the built-in __init__() function.

All classes have a function called __init__(), which is always executed when the class is being initiated.

In [None]:
p1 = Person("Jason", 12)
print(p1)
print(p1.name)
print(p1.age)

#### Object Methods
Objects can also contain methods. Methods in objects are functions that belong to the object.

In [None]:
p2 = Person("John", 36)
p2.myfunc()

#### The self Parameter
The self parameter is a reference to the current instance of the class, and is used to access variables that belongs to the class.

It does not have to be named self , you can call it whatever you like, but it has to be the first parameter of any function in the class:

In [None]:
class Person:
    
    def __init__(self, new_name, new_age):
        self.name = new_name
        self.age = new_age
    
    def myfunc(abc):
        print("Hello my name is " + abc.name)

In [None]:
p3 = Person("Julia", 12)
p3.myfunc()

#### Modify / Delete Object Properties
You can modify or delete properties on objects like this:

In [None]:
p1.age = 30

In [None]:
p1.age

In [None]:
del p1.age

In [None]:
p1.age

In [None]:
del p1 # deleting objects

---
### Advanced Python map()

map(function, iterable, ...)

Return an iterator that applies _function_ to every item of _iterable_ yielding the results.

In [None]:
store1 = [10.00, 11.00, 12.00, 13.00]
store2 = [6.00, 7.00, 8.12, 16.12]

In [None]:
cheapest = map(min, store1, store2)
print(cheapest)

In [None]:
for value in cheapest:
    print(value)

Here is a list of faculty teaching this MOOC. Can you write a function and apply it using map() to get a list of all faculty titles and last names (e.g. ['Dr. Brooks', 'Dr. Collins-Thompson', …]) ?

Problem

In [None]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return #Your answer here

list(map(#Your answer here))

My solution

In [None]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split(' ')[0] + person.split(' ')[-1]

list(map(split_title_and_name, people))


---
### Advanced Python Lambda and List Comprehensions

Problem: Convert the function into lambda

In [None]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split()[0] + ' ' + person.split()[-1]

#option 1
for person in people:
    print(split_title_and_name(person) == (lambda person:???))

#option 2
#list(map(split_title_and_name, people)) == list(map(???))


My solution

In [None]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split()[0] + ' ' + person.split()[-1]

In [None]:
for person in people:
    print((lambda x: x.split()[0] + ' ' + x.split()[-1])(person))

In [None]:
list(map(lambda x: x.split()[0] + ' ' + x.split()[-1], people))

In [None]:
#option 1
for person in people:
    print(split_title_and_name(person) == (lambda x: x.split()[0] + ' ' + x.split()[-1])(person))

#option 2
list(map(split_title_and_name, people)) == list(map(lambda x:x.split()[0] + ' ' + x.split()[-1], people))

#### List comprehension

Try converting a function into a list comprehension.

In [None]:
def times_tables():
    lst = []
    for i in range(10):
        for j in range (10):
            lst.append(i * j)
    return lst

In [None]:
times_tables() == [i * j for i in range(10) for j in range(10)]

Here’s a harder question which brings a few things together.

Many organizations have user ids which are constrained in some way. Imagine you work at an internet service provider and the user ids are all two letters followed by two numbers (e.g. aa49). Your task at such an organization might be to hold a record on the billing activity for each possible user.

Write an initialization line as a single list comprehension which creates a list of all possible user ids. Assume the letters are all lower case.

In [None]:
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'

In [None]:
answer = [txt1 + txt2 + num1 + num2 for txt1 in lowercase for txt2 in lowercase for num1 in digits for num2 in digits]
answer[1:10]

In [None]:
# without list comprehension
ids = []
string = ''

for txt1 in lowercase:
    for txt2 in lowercase:
        for num1 in digits:
            for num2 in digits:
                string = txt1 + txt2 + num1 + num2
                ids.append(string)

print(ids[1:10])
print(answer == ids)

---
### Numerical Python Library (NumPy)

#### Basic characterstics

In [1]:
import numpy as np

In [2]:
list_num = [1, 2, 3, 4, 5]
list_mix = [1, 'str', True, []]

In [3]:
np.array(list_num), np.array(list_mix)

(array([1, 2, 3, 4, 5]), array([1, 'str', True, list([])], dtype=object))

In [4]:
np.arange(1, 50, 5) # interval

array([ 1,  6, 11, 16, 21, 26, 31, 36, 41, 46])

In [5]:
np.array(list_num * 2)

array([1, 2, 3, 4, 5, 1, 2, 3, 4, 5])

In [6]:
np.repeat(list_num, 2)

array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5])

In [7]:
np.array(list_num * 2).reshape(2, 5)

array([[1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5]])

In [8]:
linsp = np.linspace(1, 100, 10) # num of values
linsp

array([  1.,  12.,  23.,  34.,  45.,  56.,  67.,  78.,  89., 100.])

In [9]:
linsp.resize(2, 5)
linsp

array([[  1.,  12.,  23.,  34.,  45.],
       [ 56.,  67.,  78.,  89., 100.]])

In [10]:
np.ones([3, 3])

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [11]:
np.zeros([3, 3])

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [12]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [13]:
np.diag([2, 1, 6])

array([[2, 0, 0],
       [0, 1, 0],
       [0, 0, 6]])

In [14]:
p = np.ones([3, 3], dtype = int)
p

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [15]:
np.vstack([p, 2*p])

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [16]:
np.hstack([p, 2*p])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

---
#### Operations

In [17]:
import numpy as np

In [18]:
x = np.array([3, 7, 1])
y = np.array([4, 8, 6])

In [19]:
x + y

array([ 7, 15,  7])

In [20]:
x * y

array([12, 56,  6])

In [21]:
x.dot(y) # dot product

74

In [22]:
z = np.array([y, y^2])
z

array([[ 4,  8,  6],
       [ 6, 10,  4]])

In [23]:
z.transpose()

array([[ 4,  6],
       [ 8, 10],
       [ 6,  4]])

In [24]:
z.shape, z.T.shape

((2, 3), (3, 2))

In [25]:
a = np.array([-6, -2, 1, 2, 5, 6, 10, 11])

In [26]:
a.sum(), a.min(), a.max(), a.mean(), a.std()

(27, -6, 11, 3.375, 5.429951657243368)

In [27]:
a.median()

AttributeError: 'numpy.ndarray' object has no attribute 'median'

#### Slicing, indexing

In [28]:
# indexing 
a.argmax(), a.argmin()

(7, 0)

In [29]:
a[1], a[1:3], a[-3:-1]

(-2, array([-2,  1]), array([ 6, 10]))

In [30]:
A = a.reshape(2,4)
A

array([[-6, -2,  1,  2],
       [ 5,  6, 10, 11]])

In [31]:
A = np.vstack([A, A*2])
A

array([[ -6,  -2,   1,   2],
       [  5,   6,  10,  11],
       [-12,  -4,   2,   4],
       [ 10,  12,  20,  22]])

In [34]:
A[0,0], A[-1,-1], A[0, 1:4]

(-6, 22, array([-2,  1,  2]))

In [35]:
A[A > 0]

array([ 1,  2,  5,  6, 10, 11,  2,  4, 10, 12, 20, 22])

In [37]:
B = A[0:2:,]
B

array([[10, -2,  1,  2],
       [ 5,  6, 10, 11]])

In [38]:
B[0, 0] = 1
B

array([[ 1, -2,  1,  2],
       [ 5,  6, 10, 11]])

In [39]:
A # change in matrix B affects A

array([[  1,  -2,   1,   2],
       [  5,   6,  10,  11],
       [-12,  -4,   2,   4],
       [ 10,  12,  20,  22]])

In [40]:
old = np.array([[1, 1, 1],
                [1, 1, 1]])

new = old
new[0, :2] = 0

print(old)

[[0 0 1]
 [1 1 1]]


In [41]:
old = np.array([[1, 1, 1],
                [1, 1, 1]])

new = old.copy() # use .copy()
new[:, 0] = 0

print(old)

[[1 1 1]
 [1 1 1]]


#### Iterating over arrays

In [42]:
import numpy as np

In [43]:
test = np.random.randint(0, 10, (4,3))
test

array([[2, 4, 2],
       [2, 4, 1],
       [9, 3, 8],
       [0, 7, 9]])

In [44]:
for row in test:
    print(row)

[2 4 2]
[2 4 1]
[9 3 8]
[0 7 9]


In [45]:
for i in range(len(test)):
    print(test[i])

[2 4 2]
[2 4 1]
[9 3 8]
[0 7 9]


In [49]:
for i, row in enumerate(test):
    print("Row {} : {}".format(i, row))

Row 0 : [2 4 2]
Row 1 : [2 4 1]
Row 2 : [9 3 8]
Row 3 : [0 7 9]


In [54]:
test2 = test**2
test2

array([[ 4, 16,  4],
       [ 4, 16,  1],
       [81,  9, 64],
       [ 0, 49, 81]])

In [55]:
for i, j in zip(test, test2):
    print(i, ' + ', j, ' = ', i*j)

[2 4 2]  +  [ 4 16  4]  =  [ 8 64  8]
[2 4 1]  +  [ 4 16  1]  =  [ 8 64  1]
[9 3 8]  +  [81  9 64]  =  [729  27 512]
[0 7 9]  +  [ 0 49 81]  =  [  0 343 729]
