In [1]:
import os
import numpy as np

def load_labeled_data(path):
    states = []
    actions = []
    for f in os.listdir(path):
        e_c = np.load(f'{path}/{f}')
        states.append(e_c['states'])
        actions.append(e_c['actions'])
    return states, actions


In [2]:
states, actions = load_labeled_data('human_db')

In [3]:
print(states[0].shape)
print(actions[0].shape)

(24, 2, 300, 200)
(24,)


In [4]:
def create_dataset(states, actions):
    def count_steps(actions):
        s = 0
        for a in actions:
            s += a.size
        return s
    num_steps = count_steps(actions)
    X_states = np.empty((num_steps, 4, 300, 200), dtype=np.single)
    y_actions = np.empty((num_steps))
    step = 0
    for s,a in zip(states, actions):
        length_try = a.size

        X_states[step:step+length_try,0] = s[:,0]
        X_states[step:step+length_try,1] = s[:,0]
        X_states[step:step+length_try,2] = s[:,1]
        X_states[step:step+length_try,3] = s[:,1]
        y_actions[step:step+length_try] = a

        step += length_try

    return X_states, y_actions


In [5]:
X_states, y_actions = create_dataset(states, actions)

In [6]:
import gc

del states
del actions
gc.collect()

0

In [7]:
print(X_states.shape)
print(y_actions.shape)

(3727, 4, 300, 200)
(3727,)


In [8]:
print(X_states.size * 4)

3577920000


In [9]:
from PIL import Image

for i in range(1):
    im = Image.fromarray(X_states[0,i]*255)
    im.show()

In [10]:
for i in range(1):
    im = Image.fromarray(X_states[0,2+i]*255)
    im.show()

In [11]:
import torch.nn as nn

model = nn.Sequential(
    nn.Conv2d(in_channels=4, out_channels=32, kernel_size=8, stride=1),
    nn.ReLU(),
    nn.MaxPool2d(5, stride=3),
    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=1),
    nn.ReLU(),
    nn.MaxPool2d(5, stride=3),
    nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
    nn.ReLU(),
    nn.MaxPool2d(5, stride=3),
    nn.Flatten(start_dim=1),
    nn.Linear(2560, 512),
    nn.ReLU(),
    nn.Linear(512, 4),
    nn.Softmax(dim=1)
)

In [12]:
X_train = np.empty((int(X_states.shape[0]*0.7),4,300,200), dtype=np.single)

In [13]:
indices = np.random.permutation(X_states.shape[0])
train_idx, test_idx = indices[:X_train.shape[0]], indices[X_train.shape[0]:]

In [14]:
X_train = X_states[train_idx]

In [15]:
del X_states
gc.collect()

0

In [16]:
from sklearn.preprocessing import OneHotEncoder

# enc = OneHotEncoder().fit(y_actions[train_idx].reshape(-1,1))
# y_train = enc.transform(y_actions[train_idx].reshape(-1,1)).toarray()
# print(y_train.shape)

# print(y_train[0])
# print(y_actions[train_idx][0])

# print(y_train[1])
# print(y_actions[train_idx][1])
y_train = y_actions[train_idx].copy()
y_train[y_train == -1] = 2 # ignore unlabeled data


In [17]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [18]:
import torch

batch_size = 16

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0

    indices = np.random.permutation(X_train.shape[0])

    for i in range(int(X_train.shape[0] / batch_size)):
        # get the inputs; data is a list of [inputs, labels]
        inputs_batch = torch.from_numpy(X_train[batch_size * i: batch_size * (i+1)])
        labels_batch = torch.from_numpy(y_train[batch_size * i: batch_size * (i+1)]).long()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs_batch).reshape((batch_size,4))
        loss = criterion(outputs, labels_batch)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 32 == 31:    # print every 200 mini-batches
            print(outputs)
            print(labels_batch)
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 32:.3f}')
            running_loss = 0.0

tensor([[0.2537, 0.2431, 0.2647, 0.2385],
        [0.2532, 0.2425, 0.2649, 0.2393],
        [0.2536, 0.2439, 0.2643, 0.2383],
        [0.2531, 0.2435, 0.2631, 0.2402],
        [0.2533, 0.2446, 0.2628, 0.2392],
        [0.2531, 0.2431, 0.2642, 0.2396],
        [0.2536, 0.2427, 0.2644, 0.2394],
        [0.2537, 0.2436, 0.2644, 0.2383],
        [0.2526, 0.2434, 0.2635, 0.2405],
        [0.2532, 0.2426, 0.2643, 0.2399],
        [0.2530, 0.2437, 0.2639, 0.2393],
        [0.2528, 0.2431, 0.2647, 0.2394],
        [0.2529, 0.2425, 0.2652, 0.2394],
        [0.2540, 0.2419, 0.2664, 0.2377],
        [0.2522, 0.2428, 0.2639, 0.2411],
        [0.2524, 0.2448, 0.2634, 0.2393]], grad_fn=<ReshapeAliasBackward0>)
tensor([2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 0, 3])
[1,    32] loss: 1.384
tensor([[0.2581, 0.2356, 0.2952, 0.2111],
        [0.2559, 0.2385, 0.2903, 0.2153],
        [0.2553, 0.2373, 0.2901, 0.2172],
        [0.2570, 0.2383, 0.2902, 0.2145],
        [0.2556, 0.2372, 0.2915, 0.2157],
    