# Notes in Chapter 4
1. PyTorch modules dealing with image data require tensors to be laid out as C × H × W :
channels, height, and width, respectively.
2. Sometimes images have an alpha channel, in other words, they have 4 channel.
3. Neural networks usually work with floating-point tensors as their input.
4. Neural networks exhibit the best training performance when the input data ranges roughly from 0 to 1, or from -1 to 1 (this is an effect of how their building blocks are defined).
5. In working with images, it is good practice to compute the mean and standard deviation on all the training data in advance and then subtract and divide by these fixed, precomputed quantities.
6. Calling `view` on a tensor returns a new tensor that changes the number of dimensions and the striding information, without changing the storage.

In [1]:
import imageio

# img_arr is numpy arr
img_arr = imageio.imread('data/ch4/bobby.jpg')
# (height, width, channel), (H, W, C)
img_arr.shape

(180, 254, 3)

In [2]:
import torch

# img is torch tensor
img = torch.from_numpy(img_arr)
# (channel, height, width), (C, H ,W)
out = img.permute(2, 0, 1)
out.shape

torch.Size([3, 180, 254])

In [3]:
batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.uint8)

In [4]:
import os

data_dir = "data/ch4/image-cats"
filenames = [name for name in os.listdir(data_dir) if os.path.splitext(name)[-1] == '.png']
for i, filename in enumerate(filenames):
    # numpy array of img
    img_t = imageio.imread(os.path.join(data_dir, filename))
    # torch tensor, but (H, W, C)
    img_t = torch.from_numpy(img_t)
    # permute to (C, H, W)
    img_t = img_t.permute(2, 0, 1)
    # sometime images have an alpha channel, we just need RGB channels.
    img_t = img_t[:3]
    batch[i] = img_t

In [5]:
# normalize tensor to 0~1
batch  = batch.float()
# Apporoach One: simply divide by 255.0
batch /= 255.0

In [6]:
batch[:, 0].shape

torch.Size([3, 256, 256])

In [7]:
# Apporoach Two:
# compute the mean and standard deviation of the input data and scale it
# so that the output has zero mean and unit standard deviation across each channel:
n_channels = batch.shape[1]
for c in range(n_channels):
    # mean and std is a scalar
    mean = torch.mean(batch[:, c])
    std = torch.std(batch[:, c])
    batch[:, c] = (batch[:, c] - mean) / std

In [8]:
import imageio

dir_path = 'data/ch4/volumetric-dicom/2-LUNG 3.0  B70f-04083'
vol_arr = imageio.volread(dir_path, 'DICOM')
vol_arr.shape

Reading DICOM (examining files): 1/99 files (1.0%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 56/99  (56.699/99  (100.0%)


(99, 512, 512)

In [9]:
import torch
vol = torch.from_numpy(vol_arr).float()
vol = torch.unsqueeze(vol, 0)
vol.shape

torch.Size([1, 99, 512, 512])

## 4.3 Representing tabular data

In [13]:
import numpy as np

wine_path = 'data/ch4/tabular-wine/winequality-white.csv'
wineq_np = np.loadtxt(wine_path, dtype=np.float32, delimiter=';', skiprows=1)
wineq_np.shape

(4898, 12)

In [14]:
import csv
# check all the columns have been read
col_list = next(csv.reader(open(wine_path), delimiter=';'))
col_list

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']

In [16]:
wineq = torch.from_numpy(wineq_np)
wineq.shape, wineq.dtype

(torch.Size([4898, 12]), torch.float32)

In [17]:
data = wineq[:, :-1]
data.shape

torch.Size([4898, 11])

In [22]:
target = wineq[:, -1].long()
target.shape

torch.Size([4898])

In [23]:
target_onehot = torch.zeros(target.shape[0], 10)
target_onehot.scatter_(1, target.unsqueeze(1), 1.0)
target_onehot

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [27]:
data_mean = torch.mean(data, dim=0)
data_mean

tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
        1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01])

In [28]:
data_var = torch.var(data, dim=0)
data_var

tensor([7.1211e-01, 1.0160e-02, 1.4646e-02, 2.5726e+01, 4.7733e-04, 2.8924e+02,
        1.8061e+03, 8.9455e-06, 2.2801e-02, 1.3025e-02, 1.5144e+00])

In [29]:
data_norm = (data - data_mean) / torch.sqrt(data_var)
data_norm

tensor([[ 1.7208e-01, -8.1761e-02,  2.1326e-01,  ..., -1.2468e+00,
         -3.4915e-01, -1.3930e+00],
        [-6.5743e-01,  2.1587e-01,  4.7996e-02,  ...,  7.3995e-01,
          1.3422e-03, -8.2419e-01],
        [ 1.4756e+00,  1.7450e-02,  5.4378e-01,  ...,  4.7505e-01,
         -4.3677e-01, -3.3663e-01],
        ...,
        [-4.2043e-01, -3.7940e-01, -1.1915e+00,  ..., -1.3130e+00,
         -2.6153e-01, -9.0545e-01],
        [-1.6054e+00,  1.1666e-01, -2.8253e-01,  ...,  1.0049e+00,
         -9.6251e-01,  1.8574e+00],
        [-1.0129e+00, -6.7703e-01,  3.7852e-01,  ...,  4.7505e-01,
         -1.4882e+00,  1.0448e+00]])

In [31]:
bad_indexes = target <= 3
bad_indexes.shape, bad_indexes.dtype, bad_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(20))

In [36]:
bad_target = data[bad_indexes]
bad_target.shape

torch.Size([20, 11])

In [37]:
# get information about wine grouped into bad, middle, good categories
bad_data = data[target <= 3]
mid_data = data[(target > 3) & (target < 7)]
good_data = data[(target >= 7)]

bad_mean = torch.mean(bad_data, dim=0)
mid_mean = torch.mean(mid_data, dim=0)
good_mean = torch.mean(good_data, dim=0)

for i, args in enumerate(zip(col_list, bad_mean, mid_mean, good_mean)):
    print('{:2} {:20} {:6.2f} {:6.2f} {:6.2f}'.format(i, *args))

 0 fixed acidity          7.60   6.89   6.73
 1 volatile acidity       0.33   0.28   0.27
 2 citric acid            0.34   0.34   0.33
 3 residual sugar         6.39   6.71   5.26
 4 chlorides              0.05   0.05   0.04
 5 free sulfur dioxide   53.33  35.42  34.55
 6 total sulfur dioxide 170.60 141.83 125.25
 7 density                0.99   0.99   0.99
 8 pH                     3.19   3.18   3.22
 9 sulphates              0.47   0.49   0.50
10 alcohol               10.34  10.26  11.42


The bad wines seem to have higher sulfur dioxide, so we use a threshold on total sulfur dioxide to discriminating good wines from bad wines.

In [39]:
total_sulfur_threshold = 141.83
total_sulfur_data = data[:, 6]
predicted_indexes = total_sulfur_data < total_sulfur_threshold
predicted_indexes.shape, predicted_indexes.sum()

(torch.Size([4898]), tensor(2727))

This means that we predict there are 2727 good wines in 4898 wines just by total sulfur threshold.

In [41]:
actual_indexes = target > 5
actual_indexes.shape, actual_indexes.sum()

(torch.Size([4898]), tensor(3258))

Actually, there are 3258 good wines in 4898 wines.

In [43]:
n_matches = torch.sum(predicted_indexes & actual_indexes)
n_predicted = torch.sum(predicted_indexes)
n_actual = torch.sum(actual_indexes)
n_matches, n_matches / n_predicted, n_matches / n_actual

(tensor(2018), tensor(0.7400), tensor(0.6194))

We got 2018 wines right. Since we predicted 2,700 wines, this gives us a 74% chance that if we predict a wine to be high quality, it actually is.  Unfortunately, there are 3,200 good wines, and we only identified 61% of them.

## 4.4 Working with time series

In [45]:
bikes_path = 'data/ch4/bike-sharing-dataset/hour-fixed.csv'
bikes_np = np.loadtxt(bikes_path, dtype=np.float, delimiter=',', 
    skiprows=1, converters={1: lambda x: float(x[8:10])})
bikes = torch.from_numpy(bikes_np)
bikes.shape

torch.Size([17520, 17])

In [51]:
bikes_col_list = next(csv.reader(open(bikes_path), delimiter=','))
bikes_col_list, len(bikes_col_list)

(['instant',
  'dteday',
  'season',
  'yr',
  'mnth',
  'hr',
  'holiday',
  'weekday',
  'workingday',
  'weathersit',
  'temp',
  'atemp',
  'hum',
  'windspeed',
  'casual',
  'registered',
  'cnt'],
 17)

In [52]:
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape

torch.Size([730, 24, 17])

In [53]:
daily_bikes = daily_bikes.transpose(1, 2)
# (N, C, L)
daily_bikes.shape

torch.Size([730, 17, 24])

In [61]:
# for simplify, we focus on one day of 24 hours
first_day = bikes[:24].long()
weather_onehot = torch.zeros(first_day.shape[0], 4)
weather_onehot.scatter_(dim=1, index=first_day[:, 9].unsqueeze(-1)-1, value=1.0)
weather_onehot

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [63]:
torch.cat((bikes[:24], weather_onehot), dim=1)[:1]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         16.0000,  1.0000,  0.0000,  0.0000,  0.0000]], dtype=torch.float64)

In [64]:
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4, daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [68]:
daily_weather_onehot.scatter_(dim=1, index=daily_bikes[:, 9, :].unsqueeze(1).long()-1, value=1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [71]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)
daily_bikes.shape

torch.Size([730, 25, 24])

## 4.5 Representing text

In [72]:
with open('data/ch4/jane-austen/1342-0.txt', encoding='utf8') as f:
    text = f.read()

In [76]:
lines = text.split('\n')
line = lines[200]
line

'“Impossible, Mr. Bennet, impossible, when I am not acquainted with him'

In [77]:
letter_t = torch.zeros(len(line), 128)
letter_t.shape

torch.Size([70, 128])

In [79]:
# onehot encode letter
for i, letter in enumerate(line.lower().strip()):
    letter_index = ord(letter) if ord(letter) < 128 else 0
    letter_t[i][letter_index] = 1

In [81]:
def clean_words(input_str):
    punctuation = '.,;:"!?“”_-'
    word_list = input_str.lower().replace('\n', ' ').split()
    word_list = [word.strip(punctuation) for word in word_list]
    return word_list

words_in_line = clean_words(line)
line, words_in_line

('“Impossible, Mr. Bennet, impossible, when I am not acquainted with him',
 ['impossible',
  'mr',
  'bennet',
  'impossible',
  'when',
  'i',
  'am',
  'not',
  'acquainted',
  'with',
  'him'])

In [82]:
# build word:index dictionary
word_list = sorted(set(clean_words(text)))
word2index_dict = {word: i for (i, word) in enumerate(word_list)}

len(word2index_dict), word2index_dict['impossible']

(7261, 3394)

In [84]:
word_t = torch.zeros(len(words_in_line), len(word2index_dict))
for i, word in enumerate(words_in_line):
    word_index = word2index_dict[word]
    word_t[i][word_index] = 1
    print('{:2} {:4} {}'.format(i, word_index, word))

word_t.shape

 0 3394 impossible
 1 4305 mr
 2  813 bennet
 3 3394 impossible
 4 7078 when
 5 3315 i
 6  415 am
 7 4436 not
 8  239 acquainted
 9 7148 with
10 3215 him


torch.Size([11, 7261])

## Exercise

In [89]:
# exercise 1.a
img_np = imageio.imread('data/ch4/bobby.jpg')
img_t = torch.from_numpy(img_np).float()
img_t = img_t.permute(2, 0, 1)
img_t.shape

torch.Size([3, 180, 254])

In [91]:
# exercise 1.b
img_mean = torch.mean(img_t, dim=0)
img_mean.shape

torch.Size([180, 254])

In [93]:
# exercise 1.c
img_mean_channel = torch.mean(img_t.view(3, -1), dim=1)
img_mean_channel.shape, img_mean_channel

(torch.Size([3]), tensor([149.9068, 112.7613,  71.0230]))

In [94]:
# exercise 2.a
with open('data/ch4/bike-sharing-dataset/fix_missing_hours.py') as f:
    code_text = f.read()

len(code_text)

1152

In [109]:
code_words = code_text.lower().replace(r"[a-z]", ' ').split()
code_word_list = sorted(set(code_words))
len(code_words), len(code_word_list)

(87, 57)

In [110]:
code_word2index_dict = {code_word:i for i, code_word in enumerate(code_word_list)}

code_word_onehot = torch.zeros(len(code_words), len(code_word_list))
for i, code_word in enumerate(code_words):
    code_word_index = code_word2index_dict[code_word]
    code_word_onehot[i][code_word_index] = 1

    print('{:2} {:2} {}'.format(i, code_word_index, code_word))

 0 29 import
 1 12 copy
 2 29 import
 3 15 csv
 4  0 #
 5 31 instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
 6 56 with
 7 48 open('hour.csv',
 8 44 newline='')
 9 11 as
10 27 hour_file,
11 47 open('hour-fixed.csv',
12  3 'w',
13 44 newline='')
14 11 as
15 23 fixed_file:
16 25 hour_csv
17  9 =
18 16 csv.reader(hour_file)
19 20 fixed_csv
20  9 =
21 17 csv.writer(fixed_file)
22 37 last_row
23  9 =
24 45 none
25 24 for
26 55 this_row
27 30 in
28 26 hour_csv:
29 28 if
30 37 last_row
31 34 is
32 46 none:
33 49 pass
34 18 elif
35 38 last_row[0]
36 10 ==
37  2 'instant':
38 49 pass
39 19 else:
40 35 last_hour
41  9 =
42 32 int(last_row[5])
43 52 this_hour
44  9 =
45 33 int(this_row[5])
46 28 if
47 52 this_hour
48  8 <
49 36 last_hour:
50 52 this_hour
51  4 +=
52  6 24
53 40 missing_row
54  9 =
55 13 copy.deepcopy(last_row)
56 42 missing_row[-1]
57  9 =
58  5 0
59 24 for
60 39 missing_hour
61 30 in
62 51 range(last_hour+1,
6