# Introduction

Tensors are a fundamental datastructure in pytorch. Tensors are n-dimensional array.

# Basics

In [1]:
# We can fill a tensor a range of values
import torch

x = torch.arange(10, dtype=torch.float64)
print(x)
print(f"x has {x.numel()} elements")
print(f"x has a shape of {x.shape}")

tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64)
x has 10 elements
x has a shape of torch.Size([10])


In [2]:
# We can reshape x without changing the values

X = x.reshape(2,5)
print(f"We reshaped x into X, which has a shape of {X.shape}")
print(X)

We reshaped x into X, which has a shape of torch.Size([2, 5])
tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]], dtype=torch.float64)


In [3]:
# We can create tensors with constants or random values

# Fill with ones
tensor_0 = torch.zeros((3,3))
tensor_1 = torch.ones((3,3))
tensor_random = torch.rand((3,3))

print(tensor_0)
print(tensor_1)
print(tensor_random)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.4341, 0.6158, 0.8276],
        [0.3254, 0.4183, 0.9883],
        [0.9799, 0.5746, 0.7240]])


# Accessing Tensor Elements

Tensors follow the indexing conventions for lists in python. Indexes are zero based, increasing from the front of the collection. Negative indexes are allowed with a meaning of indexing from the end of the collection. Slices specify ranges of indexes with a start and stop value separated by `:`

In [4]:
print(f"Tensor at index 1: {tensor_random[1]}")
print(f"The tensor at the last index is: {tensor_random[-1]}")
print(f"The first two elements of the tensor are: {tensor_random[0:2]}")

Tensor at index 1: tensor([0.3254, 0.4183, 0.9883])
The tensor at the last index is: tensor([0.9799, 0.5746, 0.7240])
The first two elements of the tensor are: tensor([[0.4341, 0.6158, 0.8276],
        [0.3254, 0.4183, 0.9883]])


# Elementwise Operations

Element wise operations apply a standard scalar operation to each element.

In [5]:
a = torch.randn(10)
print(f"Original value of a: {a}")
a_clamp = torch.clamp(a, min=-0.5, max=0.5);
print(f"Results of clamping to the range (-0.5, 0.5): {a_clamp}")

Original value of a: tensor([ 0.1598, -1.0111, -1.3132,  0.6465,  0.7383, -0.3146,  0.4081,  0.9147,
        -1.4091, -1.2341])
Results of clamping to the range (-0.5, 0.5): tensor([ 0.1598, -0.5000, -0.5000,  0.5000,  0.5000, -0.3146,  0.4081,  0.5000,
        -0.5000, -0.5000])


In [6]:
# We can also do arithmatic across vectors of the same dimension

b = torch.tensor([1, 10, 100])
c = torch.tensor([5, 50, 500])
print(f"b: {b}")
print(f"c: {c}")
print(f"b + c: {b+c}")

b: tensor([  1,  10, 100])
c: tensor([  5,  50, 500])
b + c: tensor([  6,  60, 600])


# Concatenate tensors

We can combine elements in a tensor to make a larger tensor

In [7]:
e = torch.cat((b,c), dim=0)
print(e)

tensor([  1,  10, 100,   5,  50, 500])


# Comparison

We can compare each tesor element to another tensor element by using logical statements

In [8]:
f = torch.tensor([1, 2, 3])
g = torch.tensor([1, 8, 9])
print(f == g)

tensor([ True, False, False])


# Reduction Operations

We can also perform operation on a tensor that reduce to a single value

In [9]:
torch.sum(f)

tensor(6)

# In Place Updates

We can conserve space by doing inplace updates for tensors

In [10]:
# This approach changes the address of j
j = torch.tensor([1, 10, 100])
print(f"ID of j before the operation: {id(j)}")
k = torch.tensor([5, 50, 500])
j = j + k
print(f"ID of j after the operation: {id(j)}")

# This approach updates j in place
j = torch.tensor([1, 10, 100])
print(f"ID of j before the operation: {id(j)}")
k = torch.tensor([5, 50, 500])
j[:] = j + k
print(f"ID of j after the operation: {id(j)}")

ID of j before the operation: 2213680139376
ID of j after the operation: 2213680145696
ID of j before the operation: 2213680145776
ID of j after the operation: 2213680145776


# From file to tensor

We need to chain a few steps using numpy and PyTorch to get data loaded into a tensor.

In [11]:
# Make a fake file for our example

import os

os.makedirs(os.path.join('.', 'data'), exist_ok=True)
data_file = os.path.join('.', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('''NumRooms,RoofType,Price
NA,NA,127500
2,NA,106000
4,Slate,178100
NA,NA,140000''')

In [12]:
from IPython.display import display
import pandas as pd

# Read the data from the csv file
data = pd.read_csv(data_file)
print("Original data file")
display(data.head())

# Split between inputs and targets
inputs, targets = data[["NumRooms", "RoofType"]], data[["Price"]]
# Deal with NaN categories
inputs = pd.get_dummies(inputs, dummy_na=True)
# Deal with Nan numerical fields
inputs["NumRooms"] = inputs["NumRooms"].fillna(inputs["NumRooms"].mean())
print("Transformed data")
display(inputs)

Original data file


Unnamed: 0,NumRooms,RoofType,Price
0,,,127500
1,2.0,,106000
2,4.0,Slate,178100
3,,,140000


Transformed data


Unnamed: 0,NumRooms,RoofType_Slate,RoofType_nan
0,3.0,False,True
1,2.0,False,True
2,4.0,True,False
3,3.0,False,True


In [13]:
import torch

# Pandas data frame -> numpy array -> PyTorch tensor
X = torch.tensor(inputs.to_numpy(dtype=float))
y = torch.tensor(targets.to_numpy(dtype=float))
X, y

(tensor([[3., 0., 1.],
         [2., 0., 1.],
         [4., 1., 0.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([[127500.],
         [106000.],
         [178100.],
         [140000.]], dtype=torch.float64))