# Preliminaries before diving

- basic mathmatics tools and functions
- basic data process
- basic probability theory

In [13]:
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from d2l import torch as d2l
print(f'Import PyTorch V{torch.__version__}')

Import PyTorch V1.12.1


## Creation of tensors

Major creations:

1. arange: generate an ascending sequence
1. zeros: generate a tensor whose elements are initialized as 0
1. ones: generate a tensor whose elements are initialized as 1
1. rand: generate a tensor whose elements folow uniform distribution in [0, 1]
1. randn: generate a tensor whose elements follow N(0, 1)
1. tensor: convert a numerical array into a tensor

In [3]:
t0 = torch.arange(8, dtype=torch.float64)
print(f'Create a tensor: {t0}')
print(f'Its shape is {t0.shape}, and it contains {t0.numel()} elements')

Create a tensor: tensor([0., 1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)
Its shape is torch.Size([8]), and it contains 8 elements


In [5]:
print(f'Reshape tensor to a matrix with two rows:\n{t0.reshape(2, -1)}')

Reshape tensor to a matrix with two rows:
tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]], dtype=torch.float64)


In [8]:
creations = [torch.zeros, torch.ones, torch.rand, torch.randn]
shape = (2, 3, 2)
print(f'Create tensor with shape {shape} by several common creations')
print()
for func in creations:
    print(func.__name__)
    print(func(shape))

Create tensor with shape (2, 3, 2) by several common creations

zeros
tensor([[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]])
ones
tensor([[[1., 1.],
         [1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.]]])
rand
tensor([[[0.6927, 0.4803],
         [0.4247, 0.9278],
         [0.8447, 0.5555]],

        [[0.1668, 0.4057],
         [0.9301, 0.7155],
         [0.9404, 0.2962]]])
randn
tensor([[[-0.4372,  0.4320],
         [ 0.0272, -2.0172],
         [ 0.0704, -1.6593]],

        [[ 1.1760,  0.7541],
         [ 1.2423,  0.0792],
         [-0.0781,  0.0023]]])


In [10]:
t2 = torch.tensor([[2, 1, 3], [1, 2, 1]])
print(f'Create an integer tensor from an array:\n{t2}')
print(f'Shape: {t2.shape}')
print(f'DType: {t2.dtype}')
print(f'Size: {t2.numel()}')

Create an integer tensor from an array:
tensor([[2, 1, 3],
        [1, 2, 1]])
Shape: torch.Size([2, 3])
DType: torch.int64
Size: 6


In [11]:
print(f'There are {torch.cuda.device_count()} CUDA devices')
if torch.cuda.is_available():
    print('Try create a tensor in GPU')
    t3 = torch.rand_like(t0, dtype=torch.float32, device=torch.device(type='cuda'))
    print(t3)

There are 1 CUDA devices
Try create a tensor in GPU
tensor([0.3036, 0.0079, 0.3854, 0.9890, 0.0779, 0.8209, 0.2009, 0.2005],
       device='cuda:0')


## Load data from csv files

Use pandas to load data and convert into tensor

In [22]:
print('Prepare csv data file')
os.makedirs('data', exist_ok=True)
data_file = os.path.join('data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('room_count, alley, price\n')
    f.write('NA, Pave, 127500\n')
    f.write('2, NA, 106000\n')
    f.write('4, NA, 178100\n') 
    f.write('NA, NA, 140000\n')
    f.flush()

Prepare csv data file


In [26]:
print('Load data by using Pandas V{pd.__version__}')
houses = pd.read_csv(data_file, skipinitialspace=True)
print(houses)

Load data by using Pandas V{pd.__version__}
   room_count alley   price
0         NaN  Pave  127500
1         2.0   NaN  106000
2         4.0   NaN  178100
3         NaN   NaN  140000


In [29]:
print('Consider price as function of room count and alley')
inputs, outputs = houses.loc[:, ('room_count', 'alley')], houses.loc[:, 'price']
print(f'inputs: \n{inputs}')
print(f'outputs: \n{outputs}')

Consider price as function of room count and alley
inputs: 
   room_count alley
0         NaN  Pave
1         2.0   NaN
2         4.0   NaN
3         NaN   NaN
outputs: 
0    127500
1    106000
2    178100
3    140000
Name: price, dtype: int64


In [30]:
print('Fill inputs by using mean values')
inputs = inputs.fillna(inputs.mean())
print(inputs)

Fill inputs by using mean values
   room_count alley
0         3.0  Pave
1         2.0   NaN
2         4.0   NaN
3         3.0   NaN


  inputs = inputs.fillna(inputs.mean())


In [31]:
print('Use integers to represent category-like attributes')
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

Use integers to represent category-like attributes
   room_count  alley_Pave  alley_nan
0         3.0           1          0
1         2.0           0          1
2         4.0           0          1
3         3.0           0          1


In [32]:
print('Convert Pandas DataFrame into PyTorch tensor')
inputs, outputs = torch.tensor(inputs.values), torch.tensor(outputs.values)
inputs, outputs

Convert Pandas DataFrame into PyTorch tensor


(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))