In [1]:
import csv
import os

import imageio
import numpy as np
import torch

In [2]:
IMG = '../../../../img'
DATA = '../../../../data'

In [3]:
img_arr = imageio.imread(f'{IMG}/bobby.jpg')
img_arr.shape

(720, 1280, 3)

In [4]:
img = torch.from_numpy(img_arr)
out = img.permute(2, 0, 1) # HxWxC -> CxHxW (torch format)
out.shape

torch.Size([3, 720, 1280])

In [5]:
batch_size = 3
# batch x C x H x W
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.uint8)

In [6]:
CAT_DIR = f'{IMG}/cats'

In [7]:
filenames = [f for f in os.listdir(CAT_DIR) if f.endswith('.png')]
filenames

['cat1.png', 'cat2.png', 'cat3.png']

In [8]:
for i, f in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(CAT_DIR, f))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2, 0, 1)
    img_t = img_t[:3] # drop alpha if present
    batch[i] = img_t

In [9]:
batch = batch.float() / 255.

In [10]:
n_channels = batch.shape[1]
for c in range(n_channels):
    mean = torch.mean(batch[:, c])
    sd = torch.std(batch[:, c])
    batch[:, c] = (batch[:, c] - mean) / sd

In [11]:
dir_path = f'{DATA}/p1ch4/volumetric-dicom/2-LUNG 3.0  B70f-04083'
vol_arr = imageio.volread(dir_path, 'DICOM')
vol_arr.shape

Reading DICOM (examining files): 1/99 files (1.0%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 99/99  (100.0%)


(99, 512, 512)

In [12]:
vol = torch.from_numpy(vol_arr).float()
vol = torch.unsqueeze(vol, 0)
vol.shape

torch.Size([1, 99, 512, 512])

In [13]:
path = f'{DATA}/p1ch4/tabular-wine/winequality-white.csv'
wine_np = np.loadtxt(path, dtype=np.float32, delimiter=';', skiprows=1)
wine_np

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [14]:
col_list = next(csv.reader(open(path), delimiter=';'))
wine_np.shape, col_list

((4898, 12),
 ['fixed acidity',
  'volatile acidity',
  'citric acid',
  'residual sugar',
  'chlorides',
  'free sulfur dioxide',
  'total sulfur dioxide',
  'density',
  'pH',
  'sulphates',
  'alcohol',
  'quality'])

In [15]:
wine = torch.from_numpy(wine_np)
wine.shape, wine.dtype

(torch.Size([4898, 12]), torch.float32)

In [17]:
data = wine[:, :-1]
data.shape

torch.Size([4898, 11])

In [18]:
target = wine[:, -1]
target.shape

torch.Size([4898])

In [21]:
target = wine[:, -1].long()
target

tensor([6, 6, 6,  ..., 6, 7, 6])

In [22]:
target_onehot = torch.zeros(target.shape[0], 10)
target_onehot.scatter_(1, target.unsqueeze(1), 1.)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [23]:
target.unsqueeze(1)

tensor([[6],
        [6],
        [6],
        ...,
        [6],
        [7],
        [6]])

In [24]:
data_mean = torch.mean(data, dim=0)
data_mean

tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
        1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01])

In [25]:
data_var = torch.var(data, dim=0)
data_var

tensor([7.1211e-01, 1.0160e-02, 1.4646e-02, 2.5726e+01, 4.7733e-04, 2.8924e+02,
        1.8061e+03, 8.9455e-06, 2.2801e-02, 1.3025e-02, 1.5144e+00])

In [26]:
data_normalized = (data - data_mean) / torch.sqrt(data_var)
data_normalized

tensor([[ 1.7208e-01, -8.1761e-02,  2.1326e-01,  ..., -1.2468e+00,
         -3.4915e-01, -1.3930e+00],
        [-6.5743e-01,  2.1587e-01,  4.7996e-02,  ...,  7.3995e-01,
          1.3422e-03, -8.2419e-01],
        [ 1.4756e+00,  1.7450e-02,  5.4378e-01,  ...,  4.7505e-01,
         -4.3677e-01, -3.3663e-01],
        ...,
        [-4.2043e-01, -3.7940e-01, -1.1915e+00,  ..., -1.3130e+00,
         -2.6153e-01, -9.0545e-01],
        [-1.6054e+00,  1.1666e-01, -2.8253e-01,  ...,  1.0049e+00,
         -9.6251e-01,  1.8574e+00],
        [-1.0129e+00, -6.7703e-01,  3.7852e-01,  ...,  4.7505e-01,
         -1.4882e+00,  1.0448e+00]])