# Reading and Writing CSV files

<br>
Let's import our datafile mpg.csv, which contains fuel economy data for 234 cars.

* mpg : miles per gallon
* class : car classification
* cty : city mpg
* cyl : # of cylinders
* displ : engine displacement in liters
* drv : f = front-wheel drive, r = rear wheel drive, 4 = 4wd
* fl : fuel (e = ethanol E85, d = diesel, r = regular, p = premium, c = CNG)
* hwy : highway mpg
* manufacturer : automobile manufacturer
* model : model of car
* trans : type of transmission
* year : model year

In [1]:
import csv

%precision 3

with open('data\mpg.csv') as file:
    mpg = list(csv.DictReader(file))

mpg[:2]

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '2'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'manual(m5)'),
              ('drv', 'f'),
              ('cty', '21'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')])]

In [2]:
mpg[0].keys() #column names

odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

In [3]:
sum(float(data['cty']) for data in mpg) / len(mpg) #average cty fuel economy across all cars

16.859

In [4]:
#find the average hwy mpg for each class of vehicle in our dataset.
hmympg_by_class = []
for t in set(data['class'] for data in mpg):
    summpg = 0
    vclasscount = 0
    for d in mpg:
        if d['class'] == t:
            summpg += float(d['hwy'])
            vclasscount += 1
    hmympg_by_class.append((t, summpg/vclasscount))

hmympg_by_class.sort(key = lambda x: x[1])
hmympg_by_class

[('pickup', 16.879),
 ('suv', 18.129),
 ('minivan', 22.364),
 ('2seater', 24.800),
 ('midsize', 27.293),
 ('subcompact', 28.143),
 ('compact', 28.298)]

# Dates and Times

In [5]:
import datetime as dt
import time as tm

In [6]:
tm.time()

1587732141.919

In [7]:
dtnow = dt.datetime.fromtimestamp(tm.time())
dtnow

datetime.datetime(2020, 4, 24, 14, 42, 22, 654557)

In [8]:
dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second

(2020, 4, 24, 14, 42, 22)

In [9]:
delta = dt.timedelta(hours = 1)
delta

datetime.timedelta(0, 3600)

In [10]:
summertime = dtnow + delta
summertime

datetime.datetime(2020, 4, 24, 15, 42, 22, 654557)

In [11]:
month_delta = dt.timedelta(days = 30)
month_delta

datetime.timedelta(30)

In [12]:
#30 days ago
dtnow - month_delta

datetime.datetime(2020, 3, 25, 14, 42, 22, 654557)

In [13]:
summertime > dtnow

True

# Objects and map()

In [14]:
class Person:
    department = 'Computational Linguistics'
    
    def set_name(self, a_name):
        self.name = a_name
    def set_location(self, a_location):
        self.location = a_location

In [15]:
jerry = Person()
jerry.set_name('Jerry Chang')
jerry.set_location('Tuebingen')
print('{} lives in {} and studied in the Department of {}.'.format(jerry.name, jerry.location, jerry.department))

Jerry Chang lives in Tuebingen and studied in the Department of Computational Linguistics.


In [16]:
store1 = [10.00, 11.00, 12.34, 2.34]
store2 = [9.00, 11.10, 13.34, 2.01]

best = map(max, store1, store2)
best #a map object

<map at 0x1937f493da0>

In [17]:
list(best)

[10.000, 11.100, 13.340, 2.340]

# Lambda and List Comprehensions

In [18]:
title_and_name = lambda x: x.split()[0] + ' ' + x.split()[-1]

In [19]:
title_and_name('Mr. Jerry Chih-Chun Chang')

'Mr. Chang'

In [20]:
(lambda x: x.split()[0] + ' ' + x.split()[-1])('Mr. Jerry Chih-Chun Chang')

'Mr. Chang'

In [21]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']
list(map(title_and_name, people))

['Dr. Brooks', 'Dr. Collins-Thompson', 'Dr. Vydiswaran', 'Dr. Romero']

In [22]:
my_list = [number if number % 2 == 0 else 'odd' for number in range(0,10)]
my_list

[0, 'odd', 2, 'odd', 4, 'odd', 6, 'odd', 8, 'odd']

In [23]:
another_list = [x * y for x in range(1, 4) for y in range(1, 4)]
another_list

[1, 2, 3, 2, 4, 6, 3, 6, 9]

# Numerical Python (NumPy)

In [24]:
import numpy as np

In [25]:
x = np.array([1,2,3])
x

array([1, 2, 3])

In [26]:
m = np.array([[1,2,3],[4,5,6]])
m

array([[1, 2, 3],
       [4, 5, 6]])

In [27]:
m.shape

(2, 3)

In [28]:
#returns evenly spaced values within a given interval
n = np.arange(0, 16, 2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14])

In [29]:
n = n.reshape(2,4)
n

array([[ 0,  2,  4,  6],
       [ 8, 10, 12, 14]])

In [30]:
#returns evenly spaced numbers over a specified interval
o = np.linspace(0, 1, 5)
o

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [31]:
#reshape in-place
o.resize(5, 1)
o

array([[0.  ],
       [0.25],
       [0.5 ],
       [0.75],
       [1.  ]])

In [32]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [33]:
np.zeros((2,2))

array([[0., 0.],
       [0., 0.]])

In [34]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [35]:
y = np.array([4,5,6])
np.diag(y)

array([[4, 0, 0],
       [0, 5, 0],
       [0, 0, 6]])

In [36]:
np.repeat(y,2)

array([4, 4, 5, 5, 6, 6])

In [37]:
ones = np.ones([2,2], int)
ones

array([[1, 1],
       [1, 1]])

In [38]:
#stack arrays in sequence vertically (row wise)
np.vstack([ones,2*ones, 3*ones])

array([[1, 1],
       [1, 1],
       [2, 2],
       [2, 2],
       [3, 3],
       [3, 3]])

In [39]:
#stack arrays in sequence horizontally (column wise)
np.hstack([ones,2*ones, 3*ones])

array([[1, 1, 2, 2, 3, 3],
       [1, 1, 2, 2, 3, 3]])

In [40]:
print(x+y)
print(x-y)
print(x*y)
print(x/y)

[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]


In [41]:
print(x**2) #elementwise power

[1 4 9]


**Dot Product:**  

$ \begin{bmatrix}x_1 \ x_2 \ x_3\end{bmatrix}
\cdot
\begin{bmatrix}y_1 \\ y_2 \\ y_3\end{bmatrix}
= x_1 y_1 + x_2 y_2 + x_3 y_3$

In [42]:
x.dot(y)

32

In [43]:
z = np.array([y, y**2])
z

array([[ 4,  5,  6],
       [16, 25, 36]])

In [44]:
z.T

array([[ 4, 16],
       [ 5, 25],
       [ 6, 36]])

In [45]:
z.T.shape

(3, 2)

In [46]:
z.dtype

dtype('int32')

In [47]:
#cast to a specific type.
z = z.astype('f')
z.dtype

dtype('float32')

In [48]:
a = np.array([1,2,3,4,5])

In [49]:
a.sum(), a.max(), a.min(), a.mean(), a.std()

(15, 5, 1, 3.0, 1.4142135623730951)

In [50]:
a.argmax(), a.argmin()

(4, 0)

In [51]:
r = np.arange(36)
r.resize(6,6)
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [52]:
# a slice of the last row, and only every other element
r[-1, ::2]

array([30, 32, 34])

In [53]:
r[r > 30] = 30
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [54]:
s = r[:3,:3]
s

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [55]:
s[:] = 0
s

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [56]:
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [57]:
r_copy = r.copy()

In [58]:
r_copy[:] = 5

In [59]:
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [60]:
r_copy

array([[5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5]])

In [61]:
test = np.random.randint(0, 10, (3,3))
test

array([[5, 3, 5],
       [3, 9, 6],
       [7, 5, 4]])

In [62]:
for row in test:
    print(row)

[5 3 5]
[3 9 6]
[7 5 4]


In [63]:
for i in range(len(test)):
    print(test[i])

[5 3 5]
[3 9 6]
[7 5 4]


In [64]:
for i, row in enumerate(test):
    print('row', i, 'is', row)

row 0 is [5 3 5]
row 1 is [3 9 6]
row 2 is [7 5 4]


In [65]:
test2 = test**2
test2

array([[25,  9, 25],
       [ 9, 81, 36],
       [49, 25, 16]], dtype=int32)

In [66]:
for i, j in zip(test, test2):
    print(i,'+',j,'=',i+j)

[5 3 5] + [25  9 25] = [30 12 30]
[3 9 6] + [ 9 81 36] = [12 90 42]
[7 5 4] + [49 25 16] = [56 30 20]
