In [1]:
import json
import os
import random

import cv2
import numpy as np
import torch
import torch.utils.data
from tqdm import tqdm

#custom file
from scratch_data import *

import torch.nn as nn
import torch.nn.functional as F

This is a general Pipeline of how surprise scores are generated as they are in the paper. <br>
The pipeline covers:
- Reading in training data, and test-expected data and test-unexpected data. (All normal train data is expected)
- Using a simple CNN model to do behavioral cloning (predicting actions just given states -- not state, prev_actions)
- Calculating Surprise Scores
<br>
This code used a lot of the existing code from the og BiB repo. However some things have been changed, and the class structure of how things were processed in the original 
repo has been disassembled to run each function discretely to make it clear how the operation worked. All code used from the scratch_data file is understood well.

In [2]:
type = 'preference'
path = r'/home/mike/Desktop/Codec_research/bib_evaluation_v1.1/bib_evaluation/'
expected_data_tuples, unexpected_data_tuples = get_data_tuples(path,type,False) #mp4 path, frame number, action (-1 to 1)
#False because the data was processed already (huge time save)
total_episodes = len(expected_data_tuples)/9

reading files of type preference


In [3]:
train_path = r'/home/mike/Desktop/Codec_research/bib_train'
train_tuples = get_train_tuples(train_path,type,False)
train_episodes = len(train_tuples)/9

reading files of type preference


In [None]:
print(len(expected_data_tuples), len(unexpected_data_tuples)) #total eval frames
print(f'Total Evaluation Episodes: {total_episodes}')
print(f'Total Training Episodes: {train_episodes}')

9000 9000


In [6]:
# train_results = []
# for i in range(int(total_episodes)):
#     trial_indices = get_episode_trial_indices(episode_index=i)
#     trial_result = get_trial(data_tuples, trial_indices, num_transitions=30,action_range = 10, step=1)
#     train_results.append(trial_result)
#Causes crash because to much memory

In [31]:
#From here,
#Loop through a bunch of episodes 
#Get train data, val, test
#Use a simple CNN model to predict actions?
#Compare with prev one that used the prev action in the prediction -- or did it? (Doble check)

In [7]:
#check if gpu is being used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
#Predictive Engine here: 
#One they use is more involved and also includes actions in the prediction
#Paper encodes states first and concatinate with actions
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        self.fc1 = nn.Linear(64 * 10 * 10, 128)  # After pooling, the feature map size is 10x10
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        x = x.view(-1, 64 * 10 * 10)  # Flatten the feature map
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        x = torch.tanh(x)  # Ensure output is between -1 and 1 for predicting actions
        
        return x

In [31]:
trial_result = get_trial(train_tuples, get_episode_trial_indices(episode_index=0), num_transitions=30,action_range = 10, step=1)[1]
#trial_result to play and see what a trial looks like

In [17]:
model = SimpleCNN()

#loss function
criterion = nn.MSELoss()
#optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

#train the model
def train(model, data,criterion, optimizer,epochs):
    train_length = int(len(data)/9) #9 trials per episode
    for epoch in range(epochs):
        random_order = random.sample(list(range(train_length)), train_length)
        for i in random_order: #processes loss 1 episode at a time, doing loss in random order
            #get data
            trial_indices = get_episode_trial_indices(episode_index=i)
            trial_result = get_trial(data, trial_indices, num_transitions=30,action_range = 10, step=1)
            states = trial_result[0]
            actions = trial_result[1]
            #convert to tensor
            states = torch.tensor(states).float()
            actions = torch.tensor(actions).float()
            #forward pass
            outputs = model(states)
            loss = criterion(outputs, actions)
            #backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')    


In [18]:
#train(model, train_tuples,criterion, optimizer, 3)

  states = torch.tensor(states).float()
  actions = torch.tensor(actions).float()


Epoch 1, Loss: 0.3422279357910156
Epoch 2, Loss: 0.3297484815120697
Epoch 3, Loss: 0.3267993628978729


In [19]:
#save model
#torch.save(model.state_dict(), 'simple_cnn.pth')

In [None]:
#load model
model = SimpleCNN()
model.load_state_dict(torch.load('simple_cnn.pth'))
model.eval()

In [25]:
#test model
def get_surprise_score(model, data):
    surprise_scores = []
    for i in range(int(len(data)/9)):
        trial_indices = get_episode_trial_indices(episode_index=i)
        trial_result = get_trial(data, trial_indices, num_transitions=30,action_range = 10, step=1)
        states = trial_result[0]
        actions = trial_result[1]
        states = torch.tensor(states).float()
        actions = torch.tensor(actions).float()
        outputs = model(states)
        surprise_score = criterion(outputs, actions).item()
        surprise_scores.append(surprise_score)
    max_surprise_score = max(surprise_scores)
    mean_surprise_score = np.mean(surprise_scores)
    return max_surprise_score, mean_surprise_score

In [26]:
print(get_surprise_score(model, train_tuples))
print(get_surprise_score(model, expected_data_tuples))
print(get_surprise_score(model, unexpected_data_tuples))

  states = torch.tensor(states).float()
  actions = torch.tensor(actions).float()


(0.5188149809837341, 0.3084702689496961)
(0.8545089960098267, 0.5518209140896797)
(0.880501925945282, 0.5486014810800552)
