In [1]:
# Let's create the simplest neural network consisting just of one neuron!
# It will predict a mean value of eight numbers.
# For example if there is [5, 3, 2, 6] in the input
# It will output (5 + 3 + 2 + 6) / 4 = 4.0

# Make all imports.
# You need to install pandas, numpy, matplotlib and pytorch.
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader

# Create datasets

In [2]:
# Length of all data.
data_len = 1024 * 16

# Length of the training data: 1024 * 16 - 1024 = 15360 rows.
# Remaining 1024 rows are for testing accuracy.
data_split = data_len - 1024

# Create a dictionary with random numbers (standart normal distribution).
# It has eight columns from 'x0' to 'x7'. It's our features.
# Each row is called a sample. There are `data_len` samples in the dataset.
#
# Read about dictionary comprehentions and f-strings in Python
# if you don't understand this line.
d = {f'x{i}': np.random.randn(data_len) for i in range(4)}

# Create a Pandas dataframe from the dictionary.
df = pd.DataFrame(data=d)

# Make 'y' column with mean values along columns (that's because axis=1).
df['y'] = df.mean(axis=1)

In [3]:
# Show first several samples.
print(df.head().to_string())

         x0        x1        x2        x3         y
0 -0.372170 -1.217678 -1.274363 -0.187191 -0.762851
1 -0.189180  0.730292 -0.078773 -0.502313 -0.009994
2 -2.208660  1.066618 -1.023454  0.976092 -0.297351
3 -0.790448 -1.836837  2.279384 -1.823200 -0.542776
4 -1.058742 -1.661404 -1.234470 -0.117421 -1.018009


In [4]:
# You can test that 'y' column is a really a mean value of eight features in each row.
sum = 0
for i in range(4):
    # iloc(0) means than we are extracting the first row.
    x = df[f'x{i}'].iloc[0]
    sum += x
print('-' * 20)

# Check if this is equal to 'y' value in the first row.
print(sum / 4)

--------------------
-0.7628506576331334


In [5]:
# Create a `MeanDataset` class inherited from `Dataset` class.
# This is an abstract class representing a Dataset in pytorch.
# All custom datasets should inherit Dataset and override the following methods:
# __len__ so that len(dataset) returns the size of the dataset;
# __getitem__ to support the indexing such that dataset[i] can be used to get ith sample.

class MeanDataset(Dataset):
    '''
    Mean value dataset.
    '''
    def __init__(self, df, data_split, test):
        '''
        df - dataframe;
        data_split - how much rows in the training data (remaining are in the testing dataset);
        test = {True, False} - test or train dataset.
        '''
        self.df = df
        self.test = test
        self.data_split = data_split

    def __getitem__(self, index):
        '''
        Returns a sample with specified index which can be in the following in ranges:
        0 ... (data_split-1) - for training dataset;
        0 ... (len(df)-data_split) - for testing dataset.
        '''
        if self.test:
            # Add `data_split` to the index if this is a testing dataset.
            index += self.data_split
        elif index > self.data_split-1:
            # If this is a training dataset and this is the end of training
            # data then exit from for loop.
            raise StopIteration
            
        # Assign `index` row and all columns except the last to `X` variable.
        # as_matrix() converts pandas Series to numpy array
        X = self.df.iloc[index, :-1].as_matrix()
        
        # Get the last value from `index` column
        y = self.df.iloc[index, -1]
        return X, y

    def __len__(self):
        if self.test:
            return len(self.df) - self.data_split
        else:
            return self.data_split

In [6]:
# Create MeanDataset instances with training and testing data separately.
train_dataset = MeanDataset(df, data_split, test=False)
test_dataset = MeanDataset(df, data_split, test=True)

# Check dataset objects (optional)

In [7]:
# Length of our train dataset must be equal to the `data_split`
count = 0
for i in train_dataset:
    count += 1
print(count)
print(data_split)

15360
15360


In [8]:
# Length of the test dataset must be equal to the (data_len-data_split)
count = 0
for i in test_dataset:
    count += 1
print(count)
print(data_len-data_split)

1024
1024


In [9]:
# Each sample contains two elements: features array and a result.
print(len(train_dataset[0]))

2


In [10]:
def show_samples(data, index, n):
    '''
    Auxiliary function: just displays `n` samples from the dataset object starting from `index`.
    
    Input:
    data - MeanDataset object;
    index - starting point for displaying;
    n - how much samples to display.
    '''
    columns = [f"x{i}" for i in range(4)]
    columns.append("y")
    index = [i for i in range(index, index+n)]
    result = pd.DataFrame(index=index, columns=columns)

    for i in index:
        for j in range(4):
            result.loc[i][j] = data[i][0][j]
        result.loc[i][-1] = data[i][1]

    print(result.to_string(float_format=lambda x: f"{x:.6f}"))

In [11]:
# Show first 5 samples from `train` object.
show_samples(train_dataset, 0, 5)

         x0        x1        x2        x3         y
0 -0.372170 -1.217678 -1.274363 -0.187191 -0.762851
1 -0.189180  0.730292 -0.078773 -0.502313 -0.009994
2 -2.208660  1.066618 -1.023454  0.976092 -0.297351
3 -0.790448 -1.836837  2.279384 -1.823200 -0.542776
4 -1.058742 -1.661404 -1.234470 -0.117421 -1.018009


In [12]:
# Show first 5 samples from the original dataframe. They must be equal.
print(df.head(5).to_string())

         x0        x1        x2        x3         y
0 -0.372170 -1.217678 -1.274363 -0.187191 -0.762851
1 -0.189180  0.730292 -0.078773 -0.502313 -0.009994
2 -2.208660  1.066618 -1.023454  0.976092 -0.297351
3 -0.790448 -1.836837  2.279384 -1.823200 -0.542776
4 -1.058742 -1.661404 -1.234470 -0.117421 -1.018009


In [13]:
# Show 6 samples around data_split point and compare it to the last 3 samples from train dataset
# and 3 first samples from test dataset.
# Notice that indexes in test dataset starting from zero.
print('-' * 18 + 'Original dataframe' + '-' * 19)
print(df.iloc[data_split-3:].head(6).to_string())
print()
print('-' * 12 + 'Last samples from train dataset' + '-' * 12)
show_samples(train_dataset, data_split-3, 3)
print()
print('-' * 12 + 'First samples from test dataset' + '-' * 12)
show_samples(test_dataset, 0, 3)

------------------Original dataframe-------------------
             x0        x1        x2        x3         y
15357 -0.729790 -0.048423 -1.513434  1.022374 -0.317318
15358 -0.393401  0.664073 -0.742509 -1.810689 -0.570631
15359 -0.838864  0.533188 -2.193586  0.289318 -0.552486
15360 -1.168087  2.449318  1.086021 -0.675482  0.422943
15361 -0.112024 -1.075465 -3.167638  0.687802 -0.916831
15362  0.364389  0.884602 -0.970820  0.465550  0.185930

------------Last samples from train dataset------------
             x0        x1        x2        x3         y
15357 -0.729790 -0.048423 -1.513434  1.022374 -0.317318
15358 -0.393401  0.664073 -0.742509 -1.810689 -0.570631
15359 -0.838864  0.533188 -2.193586  0.289318 -0.552486

------------First samples from test dataset------------
         x0        x1        x2        x3         y
0 -1.168087  2.449318  1.086021 -0.675482  0.422943
1 -0.112024 -1.075465 -3.167638  0.687802 -0.916831
2  0.364389  0.884602 -0.970820  0.465550  0.185930


In [14]:
# Show the last 5 samples.
print(df.tail(5).to_string())

             x0        x1        x2        x3         y
16379  1.125337  0.855660  0.726153 -1.119461  0.396922
16380 -0.610176  0.498182 -0.934309  0.286530 -0.189943
16381  0.590183  0.542074  0.486010 -1.133434  0.121208
16382 -1.375578  0.650248 -1.541667 -1.166834 -0.858458
16383  0.023187 -0.597918 -0.644188 -0.716463 -0.483845


In [15]:
# And compare it to the last 5 samples from test dataset.
show_samples(test_dataset, data_len-data_split-5, 5)

            x0        x1        x2        x3         y
1019  1.125337  0.855660  0.726153 -1.119461  0.396922
1020 -0.610176  0.498182 -0.934309  0.286530 -0.189943
1021  0.590183  0.542074  0.486010 -1.133434  0.121208
1022 -1.375578  0.650248 -1.541667 -1.166834 -0.858458
1023  0.023187 -0.597918 -0.644188 -0.716463 -0.483845


# Creating dataloaders

In [16]:
# Dataloader is an iterator which is used for loading data in batches.
# You can feed the neural network only one sample at time but this is not very wise because:
# 1. Gradients from each sample are more stohastic (we will show this later);
# 2. GPU good at parallel tasks and can process many samples simultaneously.
# Slicing dataset by batches will use GPU more efficiently (although we don't use GPU
# here, but you will definetely use it for larger datasets).
#
# Why not to pass all data at once? You can do it, but you will get error from GPU
# if it has no enough memory.

In [17]:
train_loader = DataLoader(train_dataset, batch_size=512)
test_loader = DataLoader(test_dataset, batch_size=512)

# Analyzing dataloaders (optional)

In [18]:
# We will count number of batches in the train_dataset.
count = 0
for i in train_loader:
    count += 1

# This numbers must be equal.
print(count)
print(len(train_dataset) / 512)
print('---')

# Dataloader return 2 lists (features X and output y),
# each containing 512 samples.
print(f"{len(i)} x {len(i[0])}")

30
30.0
---
2 x 512


In [19]:
# Quickly check test_loader also:
count = 0
for i in test_loader:
    count += 1

print(count)
print(len(test_dataset) / 512)
print('---')
print(f"{len(i)} x {len(i[0])}")

2
2.0
---
2 x 512


# Create neural network

In [20]:
# nn.Module is a basic class for all neural networks.
# You must subclass this class.
# At least you must override `forward` method which is called
# at each call. It performs one forward propogation throught
# the network.

class SimplestNN(nn.Module):
    '''
    One-neuron neural network.
    '''
    def __init__(self, in_features=4, out_features=1):
        '''
        in_features - input dimensions (4 features).
        out_features - output dimensions (one output).
        '''
        super().__init__()
        # Create linear layer. It's just performs vector multiplication
        # of the inputs and weights.
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, inp):
        y_pred = self.linear(inp)
        return y_pred

In [21]:
model = SimplestNN()

# As you can see, the model have 4 inputs and one output.
# It's also includes the bias.
print(model)

SimplestNN(
  (linear): Linear(in_features=4, out_features=1, bias=True)
)


# Analyze neural network (optional)

In [22]:
# Show linear layer weights and a bias.
# They are randomly initialized.
print(model.linear.weight)
print(model.linear.bias)

Parameter containing:
 0.2601 -0.2865 -0.3699  0.0884
[torch.FloatTensor of size 1x4]

Parameter containing:
 0.3372
[torch.FloatTensor of size 1]



In [23]:
# Use network to predict the mean of 4 numbers.
y_pred = model(Variable(torch.FloatTensor([1, 2, 3, 4])))
y_actual = (1 + 2 + 3 + 4) / 4
# As you can see it performs very poorly because doesn't know what
# to do with this four numbers.
print(f"Prediction: {float(y_pred)}")
print(f"Actual: {y_actual}")

Prediction: -0.7316495180130005
Actual: 2.5


In [24]:
# How this prediction has been made? That's simple.

# Transpose wights.
w = torch.t(model.linear.weight)
# This is bias.
b = model.linear.bias
# This is inputs.
X = Variable(torch.FloatTensor([1, 2, 3, 4]))

y_pred = w[0] * X[0] + w[1] * X[1] + w[2] * X[2] + w[3] * X[3] + b
print(f"Prediction: {float(y_pred)}")

# Or more compactly:
y_pred = torch.dot(w, X) + b
print(f"Prediction: {float(y_pred)}")

Prediction: -0.7316495180130005
Prediction: -0.7316495180130005


# Train neural network

In [25]:
# Creating optimizer with Adam algorithm with learning rate 1e-2.
# It will calculate gradient based on model.parameters().
optimizer = torch.optim.Adam(model.parameters(), 1e-2)

# As loss function we will use the mean squared error.
criterion = torch.nn.MSELoss()

In [26]:
# The several cells below just for explanation of what is going on.
# You can skip right to the for loop.

In [27]:
# Get first batch from train_loader
batch = next(iter(train_loader))

# Get predictions.
y_pred = model(Variable(batch[0].float()))

# Get the real values.
y = batch[1].float()

# model() returns Variable object which contains history about
# operations on this Varibale. We are not interested in it
# so we get only `data` property. view(512) makes one-dimensional
# tensor from two-dimensional.
loss = torch.sum((y_pred.data.view(512) - y)**2)/512
print(f"{float(loss)}")

# It's equal to previous calculation. Our loss function is just
# a sum of squared difference between predicted and actual values
# divided by number of samples in the batch.
loss = criterion(y_pred, Variable(y))
print(f"{float(loss)}")

0.6940860463436538
0.6940858960151672


In [28]:
# Compute gradient of the loss function and make one step in the direction
# where loss is decreasing the most.
loss.backward()
optimizer.step()

In [29]:
# Loss have been got lower.
y_pred = model(Variable(batch[0].float()))
loss = criterion(y_pred, Variable(y))
print(f"{float(loss)}")

0.6666650176048279


In [30]:
# You can now repeat this actions again and again
# for every batch in train dataset.
# This is called a stochastic gradient descent.
# The complete passing of the train dataset is called
# the epoch. We are doing 3 epoch.
for epoch in range(3):
    print('-' * 5 + f"{epoch} epoch" + '-' * 5)
    for batch in train_loader:
        y_pred = model(Variable(batch[0].float()))
        loss = criterion(y_pred, Variable(batch[1].float()))
        print(loss.data[0])
        # We must clear gradient because pytorch by default add them
        # for each loop cycle.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

-----0 epoch-----
0.6666650176048279
0.6775162220001221
0.667523205280304
0.7440598607063293
0.6509069800376892
0.6184183359146118
0.6012232899665833
0.6109827160835266
0.5884798169136047
0.5065759420394897
0.5219050645828247
0.5143373608589172
0.42852547764778137
0.37453868985176086
0.4224548935890198
0.38718080520629883
0.34943997859954834
0.35851937532424927
0.3115392029285431
0.313249409198761
0.314829558134079
0.2815018594264984
0.24275627732276917
0.26506757736206055
0.2513918876647949
0.21543729305267334
0.22695203125476837
0.19017481803894043
0.18936774134635925
0.2031327486038208
-----1 epoch-----
0.15000402927398682
0.14392782747745514
0.13950906693935394
0.1527668982744217
0.1278299242258072
0.12499474734067917
0.12116208672523499
0.11674601584672928
0.11002453416585922
0.09352713823318481
0.09382862597703934
0.09117377549409866
0.07610521465539932
0.06466460973024368
0.07039481401443481
0.06305839866399765
0.0533873587846756
0.056814491748809814
0.047644611448049545
0.04769

In [31]:
# Weights are now near 0.25. That's makes sence because
# the mean of four numbers is a sum of each multiplied by 0.25.
model.linear.weight

Parameter containing:
 0.2497  0.2461  0.2228  0.2493
[torch.FloatTensor of size 1x4]

# Test model accuracy

In [32]:
losses = [] # List of loss values for each batch in the test dataset.
for batch in test_loader:
    y_pred = model(Variable(batch[0].float()))
    losses.append(criterion(y_pred, Variable(batch[1].float())).data[0])
print(np.mean(losses)) # Compute the mean value.

0.0007288581400644034


# Make prediction for arbitrary input (optional)

In [33]:
y_pred = model(Variable(torch.FloatTensor([[1, 2, 3, 4]])))
print(y_pred)

Variable containing:
 2.4077
[torch.FloatTensor of size 1x1]



In [34]:
y_actual = (1 + 2 + 3 + 4) / 4
# Now the results are better.
print(f"Prediction: {float(y_pred)}")
print(f"Actual: {y_actual}")

Prediction: 2.4076766967773438
Actual: 2.5
