# Predictive coding framework 
##Including sequence modeling, action environment interaction and mutiagent language learning


In [1]:
import torch
import numpy as np
from torch.functional import F
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import transforms
import torchvision.datasets as dst
from torchvision.utils import save_image
import pandas as pd

img_x=240
img_y=320

def conv2D_output_size(img_size, padding, kernel_size, stride):
    # compute output shape of conv2D
    outshape = (np.floor((img_size[0] + 2 * padding[0] - (kernel_size[0] - 1) - 1) / stride[0] + 1).astype(int),
                np.floor((img_size[1] + 2 * padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1).astype(int))
    return outshape

#二维卷积
class EncoderCNN(nn.Module):
    def __init__(self, img_x=img_x, img_y=img_y, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=300):
        super(EncoderCNN, self).__init__()

        self.img_x = img_x
        self.img_y = img_y
        self.CNN_embed_dim = CNN_embed_dim

        # CNN architechtures
        self.ch1, self.ch2, self.ch3, self.ch4 = 32, 64, 128, 256
        self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)      # 2d kernal size
        self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)      # 2d strides
        self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  # 2d padding

        # conv2D output shapes
        self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1)  # Conv1 output shape
        self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
        self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
        self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)

        # fully connected layer hidden nodes
        self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
        self.drop_p = drop_p

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
            nn.BatchNorm2d(self.ch1, momentum=0.01),
            nn.ReLU(inplace=True),                      
            # nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
            nn.BatchNorm2d(self.ch2, momentum=0.01),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2),
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
            nn.BatchNorm2d(self.ch3, momentum=0.01),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2),
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
            nn.BatchNorm2d(self.ch4, momentum=0.01),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2),
        )

        self.drop = nn.Dropout2d(self.drop_p)
        self.pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1)   # fully connected layer, output k classes
        self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        self.fc3 = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)   # output = CNN embedding latent variables

    def forward(self, x):
        
            # CNNs
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0), -1)           # flatten the output of conv

        # FC layers
        x = F.relu(self.fc1(x))
        # x = F.dropout(x, p=self.drop_p, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=self.drop_p, training=self.training)
        out = self.fc3(x)
            
        # swap time and sample dim such that (sample dim, time dim, CNN latent dim)
        # cnn_embed_seq: shape=(batch, time_step, input_size)

        return out

#class: sequence coding module
#惰性编码单元
class inertiaCodingCell(nn.Module):
    '''
    input:(batch,seq_len,channel,witdth,height)
    output:(batch,code_len,code_dim)
    
    '''
    def __init__(self,n_channel=1,width=img_x,height=img_y):
        super(inertiaCodingCell,self).__init__()
        self.encoder_l=EncoderCNN(CNN_embed_dim=1024)
        self.d_f1=nn.Linear(in_features=1024,out_features=1024)
        self.d_fE=nn.Linear(in_features=1024,out_features=256)
        self.d_fS=nn.Linear(in_features=1024,out_features=256)
        
        self.pred_f1=nn.Linear(in_features=512,out_features=512)
        self.pred_f2=nn.Linear(in_features=512,out_features=256)
        
        self.decoder_fl=nn.Linear(in_features=512,out_features=56*75*3)
        self.decoder_f2=nn.ConvTranspose2d(in_channels=3,out_channels=64,kernel_size=(7,7),stride=2)
        self.decoder_f3=nn.ConvTranspose2d(in_channels=64,out_channels=3,kernel_size=(8,12),stride=2)

                
    def encoder(self,x):
        h=self.encoder_l(x)
        h=self.d_f1(h)
        h=F.relu(h)
        E=self.d_fE(h)
        E=F.relu(E)
        S=self.d_fS(h)
        S=F.relu(S)
        return E,S
    
    def predictor(self,E,S):
        temp=torch.cat([E,S],1)
        temp=self.pred_f1(temp)
        temp=F.relu(temp)
        out=self.pred_f2(temp)
        out=F.relu(out)
        return(out)

    def decoder(self,E,S):
        temp=torch.cat([E,S],1)
        temp=self.decoder_fl(temp)
        temp=temp.view(-1,3,56,75)
        temp=self.decoder_f2(temp)
        x_rec=self.decoder_f3(temp)
        return(x_rec)
        
    def forward(self,x,pre_E=None,pred_S=None):
        
        if (pre_E is None) or (pred_S is None):
            
            x_in=x
            E_plus,S_plus=self.encoder(x_in)
            E=E_plus
            S=S_plus
            x_rec=self.decoder(E,S)
            S_pred=self.predictor(E,S)
            
        else:
            
            x_pred_rec=self.decoder(pre_E,pred_S)
            x_in=x-x_pred_rec
            E_plus,S_plus=self.encoder(x_in)
            E=pre_E+E_plus
            S=pred_S+S_plus
            x_rec=self.decoder(E,S)
            S_pred=self.predictor(E,S)
        
        return(E,S,S_pred,E_plus,x_rec)
    
#惰性编码预测网络
class inertiaCodingNet(nn.Module):
    def __init__(self,seq_len=10,n_channel=3,w=img_x,h=img_y):
        super(inertiaCodingNet,self).__init__()
        self.seq_len=seq_len
        self.n_channel=n_channel
        self.w=w
        self.h=h
        self.inertiaCell=inertiaCodingCell()
        
        
    
    #input (batch_size,seq_len,n_channel,w,h)
    def forward(self,x):
        b,s_len,c_len,w,h=x.shape
        E=[]
        S=[]
        S_pred=[]
        E_plus=[]
        x_rec=[]
        for i in range(s_len):
            x_t=x[:,i,:,:,:]
            
            if i==0:
                E_t,S_t,S_pred_t,E_plus_t,x_rec_t=self.inertiaCell(x_t)
            else:
                E_t,S_t,S_pred_t,E_plus_t,x_rec_t=self.inertiaCell(x_t,pre_E=E_t,pred_S=S_pred_t)
            
            E.append(E_t)
            S.append(S_t)
            S_pred.append(S_pred_t)
            E_plus.append(E_plus_t)
            x_rec.append(x_rec_t)
            
        E=torch.stack(E).transpose(0,1)
        S=torch.stack(S).transpose(0,1)
        E_plus=torch.stack(E_plus).transpose(0,1)
        x_rec=torch.stack(x_rec).transpose(0,1)
            
        #(batch_size,seq_len,hidden_dim)
        return(E,S,E_plus,x_rec)
        
        
#方差损失

def var_loss(tensor):
    temp=tensor
    means=torch.mean(temp,axis=1).view(tensor.shape[0],1,-1)
    return torch.sum(torch.pow(temp-means,2)) 
        
        
        
        
        
        

        
        
        
        
        
        

In [4]:
#dataset
import os
import numpy as np
from PIL import Image
from torch.utils import data
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm

## ------------------- label conversion tools ------------------ ##
def labels2cat(label_encoder, list):
    return label_encoder.transform(list)

def labels2onehot(OneHotEncoder, label_encoder, list):
    return OneHotEncoder.transform(label_encoder.transform(list).reshape(-1, 1)).toarray()

def onehot2labels(label_encoder, y_onehot):
    return label_encoder.inverse_transform(np.where(y_onehot == 1)[1]).tolist()

def cat2labels(label_encoder, y_cat):
    return label_encoder.inverse_transform(y_cat).tolist()


class Dataset_CRNN(data.Dataset):
    "Characterizes a dataset for PyTorch"
    def __init__(self, data_path, frames,labels=None,transform=None,load_all=False):
        "Initialization"
        self.data_path = data_path
        #self.labels = labels
        folders=[]
        for f in os.listdir(data_path):
            if f!='.DS_Store':
                folders.extend(list(map(lambda x:f+'/'+x,os.listdir(data_path+f))))
                
        for i in range(len(folders)-1,-1,-1):
            if '.DS_Store' in folders[i]:
                folders.pop(i)
        
        self.folders = folders
        self.load_all=load_all
        self.transform = transform
        self.frames = frames
        
        if self.load_all:
            k=0
            temp=[]
            print('loading all images')
            for f in self.folders:
                k+=1
                temp.append(self.read_images(self.data_path, f, self.transform) )
                if (k)%1000==0:
                    print(float(k)/len(self.folders))
            self.dataset=temp
        
        

    def __len__(self):
        "Denotes the total number of samples"
        return len(self.folders)

    def read_images(self, path, selected_folder, use_transform):
        
        X = []
        for i in self.frames:
            image = Image.open(os.path.join(path, selected_folder, 'frame'+str(i)+'.jpg'))

            if use_transform is not None:
                image1 = use_transform(image)

            X.append(image1)
            image.close()
        X = torch.stack(X, dim=0)

        return X


    def __getitem__(self, index):
        "Generates one sample of data"
        # Select sample
        if not self.load_all:
            folder = self.folders[index]

        # Load data
            X = self.read_images(self.data_path, folder, self.transform)     # (input) spatial images
        #y = torch.LongTensor([self.labels[index]])                  # (labels) LongTensor are for int64 instead of FloatTensor
        else:
            X=self.dataset[index]
        return X

    


In [9]:
%%time
d_path='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/ucf101-jpg/'

transform = transforms.Compose([transforms.Resize([240, 320]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])


datatest=Dataset_CRNN(data_path=d_path,
                      frames=list(range(1,21)),
                      transform=transform,
                      load_all=False
                     )

use_cuda=False
batch_size=20

all_data_params = {'batch_size': batch_size, 'shuffle': False, 'num_workers': 15}#, 'pin_memory': True}# if use_cuda else {}
all_data_loader = data.DataLoader(datatest, 
                                  **all_data_params)


CPU times: user 8.5 ms, sys: 18.9 ms, total: 27.4 ms
Wall time: 34.8 ms


In [10]:
%%time
for i,datai in enumerate(all_data_loader):
    print(i,datai.shape)


0 torch.Size([20, 20, 3, 240, 320])
1 torch.Size([20, 20, 3, 240, 320])
2 torch.Size([20, 20, 3, 240, 320])
3 torch.Size([20, 20, 3, 240, 320])
4 torch.Size([20, 20, 3, 240, 320])
5 torch.Size([20, 20, 3, 240, 320])
6 torch.Size([20, 20, 3, 240, 320])
7 torch.Size([20, 20, 3, 240, 320])
8 torch.Size([20, 20, 3, 240, 320])
9 torch.Size([20, 20, 3, 240, 320])
10 torch.Size([20, 20, 3, 240, 320])
11 torch.Size([20, 20, 3, 240, 320])
12 torch.Size([20, 20, 3, 240, 320])
13 torch.Size([20, 20, 3, 240, 320])
14 torch.Size([20, 20, 3, 240, 320])
15 torch.Size([20, 20, 3, 240, 320])
16 torch.Size([20, 20, 3, 240, 320])
17 torch.Size([20, 20, 3, 240, 320])
18 torch.Size([20, 20, 3, 240, 320])
19 torch.Size([20, 20, 3, 240, 320])
20 torch.Size([20, 20, 3, 240, 320])
21 torch.Size([20, 20, 3, 240, 320])
22 torch.Size([20, 20, 3, 240, 320])
23 torch.Size([20, 20, 3, 240, 320])
24 torch.Size([20, 20, 3, 240, 320])
25 torch.Size([20, 20, 3, 240, 320])
26 torch.Size([20, 20, 3, 240, 320])
27 torch.Si

225 torch.Size([20, 20, 3, 240, 320])
226 torch.Size([20, 20, 3, 240, 320])
227 torch.Size([20, 20, 3, 240, 320])
228 torch.Size([20, 20, 3, 240, 320])
229 torch.Size([20, 20, 3, 240, 320])
230 torch.Size([20, 20, 3, 240, 320])
231 torch.Size([20, 20, 3, 240, 320])
232 torch.Size([20, 20, 3, 240, 320])
233 torch.Size([20, 20, 3, 240, 320])
234 torch.Size([20, 20, 3, 240, 320])
235 torch.Size([20, 20, 3, 240, 320])
236 torch.Size([20, 20, 3, 240, 320])
237 torch.Size([20, 20, 3, 240, 320])
238 torch.Size([20, 20, 3, 240, 320])
239 torch.Size([20, 20, 3, 240, 320])
240 torch.Size([20, 20, 3, 240, 320])
241 torch.Size([20, 20, 3, 240, 320])
242 torch.Size([20, 20, 3, 240, 320])
243 torch.Size([20, 20, 3, 240, 320])
244 torch.Size([20, 20, 3, 240, 320])
245 torch.Size([20, 20, 3, 240, 320])
246 torch.Size([20, 20, 3, 240, 320])
247 torch.Size([20, 20, 3, 240, 320])
248 torch.Size([20, 20, 3, 240, 320])
249 torch.Size([20, 20, 3, 240, 320])
250 torch.Size([20, 20, 3, 240, 320])
251 torch.Si

450 torch.Size([20, 20, 3, 240, 320])
451 torch.Size([20, 20, 3, 240, 320])
452 torch.Size([20, 20, 3, 240, 320])
453 torch.Size([20, 20, 3, 240, 320])
454 torch.Size([20, 20, 3, 240, 320])
455 torch.Size([20, 20, 3, 240, 320])
456 torch.Size([20, 20, 3, 240, 320])
457 torch.Size([20, 20, 3, 240, 320])
458 torch.Size([20, 20, 3, 240, 320])
459 torch.Size([20, 20, 3, 240, 320])
460 torch.Size([20, 20, 3, 240, 320])
461 torch.Size([20, 20, 3, 240, 320])
462 torch.Size([20, 20, 3, 240, 320])
463 torch.Size([20, 20, 3, 240, 320])
464 torch.Size([20, 20, 3, 240, 320])
465 torch.Size([20, 20, 3, 240, 320])
466 torch.Size([20, 20, 3, 240, 320])
467 torch.Size([20, 20, 3, 240, 320])
468 torch.Size([20, 20, 3, 240, 320])
469 torch.Size([20, 20, 3, 240, 320])
470 torch.Size([20, 20, 3, 240, 320])
471 torch.Size([20, 20, 3, 240, 320])
472 torch.Size([20, 20, 3, 240, 320])
473 torch.Size([20, 20, 3, 240, 320])
474 torch.Size([20, 20, 3, 240, 320])
475 torch.Size([20, 20, 3, 240, 320])
476 torch.Si

In [17]:
%%time

code_net=inertiaCodingNet()#.to_device(device)

a=torch.tensor(np.random.rand(10*240*320*3*10).reshape(10,10,3,240,320),dtype=torch.float32)#.to_device(device)
E,H,E_plus,x_rec=code_net(a)
print(E.shape)
print(H.shape)
print(E_plus.shape)
print(x_rec.shape)

<built-in method size of Tensor object at 0x7f8c68849f30>
<built-in method size of Tensor object at 0x7f8c79c999d8>
<built-in method size of Tensor object at 0x7f8c68852dc8>
<built-in method size of Tensor object at 0x7f8c688523f0>
<built-in method size of Tensor object at 0x7f8c6886ccf0>
<built-in method size of Tensor object at 0x7f8c6886c708>
<built-in method size of Tensor object at 0x7f8c79c076c0>
<built-in method size of Tensor object at 0x7f8c6884c6c0>
<built-in method size of Tensor object at 0x7f8c6884c5a0>
<built-in method size of Tensor object at 0x7f8c6884cf30>
torch.Size([10, 10, 256])
torch.Size([10, 10, 256])
torch.Size([10, 10, 256])
torch.Size([10, 10, 3, 240, 320])
CPU times: user 41.2 s, sys: 784 ms, total: 42 s
Wall time: 7.59 s


In [29]:
import os
import numpy as np
import torch
import torchvision.transforms as transforms
import torch.utils.data as data
import matplotlib.pyplot as plt
from functions import *
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
import pandas as pd
import pickle

# set path
action_name_path = "./UCF101actions.pkl"
save_model_path = "./checkpoints/"

# use same encoder CNN saved!
CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768
CNN_embed_dim = 512   # latent dim extracted by 2D CNN
img_x, img_y = 256, 342  # resize video 2d frame size
dropout_p = 0.0       # dropout probability

# use same decoder RNN saved!
RNN_hidden_layers = 3
RNN_hidden_nodes = 512
RNN_FC_dim = 256

# training parameters
k = 101             # number of target category
batch_size = 40
# Select which frame to begin & end in videos
begin_frame, end_frame, skip_frame = 1, 29, 1


with open(action_name_path, 'rb') as f:
    action_names = pickle.load(f)   # load UCF101 actions names

# convert labels -> category
le = LabelEncoder()
le.fit(action_names)

# show how many classes there are
list(le.classes_)

# convert category -> 1-hot
action_category = le.transform(action_names).reshape(-1, 1)
enc = OneHotEncoder()
enc.fit(action_category)

# # example
# y = ['HorseRace', 'YoYo', 'WalkingWithDog']
# y_onehot = labels2onehot(enc, le, y)
# y2 = onehot2labels(le, y_onehot)

actions = []
fnames = os.listdir(data_path)

all_names = []
for f in fnames:
    loc1 = f.find('v_')
    loc2 = f.find('_g')
    actions.append(f[(loc1 + 2): loc2])

    all_names.append(f)


# list all data files
all_X_list = all_names              # all video file names
all_y_list = labels2cat(le, actions)    # all video labels

# data loading parameters
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 4, 'pin_memory': True} if use_cuda else {}

transform = transforms.Compose([transforms.Resize([img_x, img_y]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

selected_frames = np.arange(begin_frame, end_frame, skip_frame).tolist()

# reset data loader
all_data_params = {'batch_size': batch_size, 'shuffle': False, 'num_workers': 4, 'pin_memory': True} if use_cuda else {}
all_data_loader = data.DataLoader(Dataset_CRNN(data_path, all_X_list, all_y_list, selected_frames, transform=transform), **all_data_params)


tensor(434.7973, grad_fn=<SumBackward0>)

In [None]:
import imageio
import torchvision.transforms as transforms
from torchvision.utils import save_image
from pathos.multiprocessing import ProcessingPool as P
import os
import scipy
import scipy.misc


#Image.open(os.path.join(path, selected_folder, 'frame{:06d}.jpg'.format(i))).convert('L')

def convert_v2imag(inpath,outpath):
    files=os.listdir(inpath)
    for f in files:
        
        if not os.path.isdir(outpath):
            os.mkdir(outpath)
        
        outfolder=outpath+'/'+f.split('.')[0]
        os.chdir(outpath)
        
        os.mkdir(outfolder)
        file=imageio.get_reader(inpath+'/'+f, "ffmpeg")
        print(f)
        for i in range(len(list(file))):
            im_array=list(file)[i]
            scipy.misc.imsave(outfolder+'/'+'frame'+str(i)+'.jpg', im_array)

inpath_header='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/UCF-101/'
outpath_header='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/ucf101-jpg/'

def convert_folder(inpath_h,outpath_h,threads=6):
    
    p=P(threads)
    
    folders=os.listdir(inpath_h)
    
    p.map(lambda x:convert_v2imag(inpath_h+'/'+x,outpath_h+'/'+x),folders)
    
    


convert_folder(inpath_h='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/UCF-101/',
               outpath_h='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/ucf101-jpg/',threads=12)
        


In [16]:
from PIL import Image
import numpy as np
import torch
#x=np.stack(list(vd)[:100],0).transpose(0,3,1,2).reshape(10,10,3,240,-1)
#x=torch.tensor(x,dtype=torch.float32)
im1=Image.open('/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/ucf101-jpg/BlowDryHair/v_BlowDryHair_g01_c03/frame3.jpg')
transform = transforms.Compose([transforms.Resize([240, 320]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
#transforms.ToTensor()ba
transform(im1).shape
save_image(transform(im1),'/Users/lekang/Desktop/tewsting.jpg')

In [135]:
from torchvision.utils import save_image
import scipy.misc

scipy.misc.imsave('/Users/lekang/Desktop/testingsave2.jpg', np.array(list(vd)[60]),)

image_output = Image.fromarray(np.array(list(vd)[60].transpose(2,0,1)[0,:,:]))
image_output.save('/Users/lekang/Desktop/testingsave1.jpg')

save_image(torch.tensor(list(vd)[60].transpose(2,0,1),dtype=torch.float32),'/Users/lekang/Desktop/testingsave.jpg')

`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
  after removing the cwd from sys.path.


In [94]:
%%time
E,H,E_plus,x_rec=code_net(x)


CPU times: user 39.6 s, sys: 673 ms, total: 40.3 s
Wall time: 6.86 s


In [14]:
%%time
from torchvision.utils import save_image
import scipy.misc
import imageio
import torchvision.transforms as transforms
x1=imageio.get_reader('/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/UCF-101/ApplyLipstick/v_ApplyLipstick_g01_c02.avi',
                   "ffmpeg")
totensor=transforms.ToTensor()

print(totensor((list(x1)[0])).shape)

torch.Size([3, 240, 320])
CPU times: user 16.2 ms, sys: 24.4 ms, total: 40.6 ms
Wall time: 109 ms


In [25]:

class Dataset_Video(data.Dataset):
    "Characterizes a dataset for PyTorch"
    def __init__(self, data_path, frames,labels=None,transform=None,load_all=False):
        "Initialization"
        self.data_path = data_path
        #self.labels = labels
        files=[]
        for f in os.listdir(data_path):
            if f!='.DS_Store':
                files.extend(list(map(lambda x:f+'/'+x,os.listdir(data_path+f))))
                
        for i in range(len(files)-1,-1,-1):
            if '.DS_Store' in files[i]:
                files.pop(i)
        
        self.files = files
        self.load_all=load_all
        self.transform = transform
        self.frames = frames
        '''
        if self.load_all:
            k=0
            temp=[]
            print('loading all images')
            for f in self.folders:
                k+=1
                temp.append(self.read_images(self.data_path, f, self.transform) )
                if (k)%1000==0:
                    print(float(k)/len(self.folders))
            self.dataset=temp
        '''
        

    def __len__(self):
        "Denotes the total number of samples"
        return len(self.files)

    def read_images(self, path, selected_file, use_transform):
        
        X = []
        temp=list(imageio.get_reader(os.path.join(path, selected_file),"ffmpeg"))

        for i in self.frames:

            if use_transform is not None:
                image1 = use_transform(temp[i])
            else:
                image1 = temp[i]
            X.append(image1)
        X = torch.stack(X, dim=0)

        return X


    def __getitem__(self, index):
        "Generates one sample of data"
        # Select sample
        if not self.load_all:
            file1 = self.files[index]

        # Load data
            X = self.read_images(self.data_path, file1, self.transform)     # (input) spatial images
        #y = torch.LongTensor([self.labels[index]])                  # (labels) LongTensor are for int64 instead of FloatTensor
        else:
            X=self.dataset[index]
        return X
    
d1=Dataset_Video(data_path='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/UCF-101/',
                frames=range(20),
                transform=totensor
               )
d1[3535]

tensor([[[[0.1333, 0.1333, 0.1333,  ..., 0.0824, 0.0824, 0.0824],
          [0.1333, 0.1333, 0.1333,  ..., 0.0824, 0.0824, 0.0824],
          [0.1333, 0.1333, 0.1333,  ..., 0.0824, 0.0824, 0.0824],
          ...,
          [0.1333, 0.1333, 0.1333,  ..., 0.6118, 0.6118, 0.6118],
          [0.1333, 0.1333, 0.1333,  ..., 0.6118, 0.6118, 0.6118],
          [0.1333, 0.1333, 0.1333,  ..., 0.6118, 0.6118, 0.6118]],

         [[0.1451, 0.1451, 0.1451,  ..., 0.0667, 0.0667, 0.0667],
          [0.1451, 0.1451, 0.1451,  ..., 0.0667, 0.0667, 0.0667],
          [0.1451, 0.1451, 0.1451,  ..., 0.0667, 0.0667, 0.0667],
          ...,
          [0.1490, 0.1490, 0.1490,  ..., 0.5490, 0.5490, 0.5490],
          [0.1490, 0.1490, 0.1490,  ..., 0.5490, 0.5490, 0.5490],
          [0.1490, 0.1490, 0.1490,  ..., 0.5490, 0.5490, 0.5490]],

         [[0.1804, 0.1804, 0.1804,  ..., 0.1176, 0.1176, 0.1176],
          [0.1804, 0.1804, 0.1804,  ..., 0.1176, 0.1176, 0.1176],
          [0.1804, 0.1804, 0.1804,  ..., 0

In [3]:
#from InertiaNet import *
from data import *
import torch
import os
import numpy as np
import torch
import torchvision.transforms as transforms
import torch.utils.data as data
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
import pandas as pd
import pickle


d_path='/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/ucf101-jpg/'
action_name_path = "inertialCodingNet.pkl"
save_model_path = "checkpoints/"

transform = transforms.Compose([transforms.Resize([240, 320]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])


datatest=Dataset_CRNN(data_path=d_path,
                      frames=list(range(1,21)),
                      transform=transform,
                      load_all=False
                     )

use_cuda=False
batch_size=10
lr=1e-3
epoch=3

all_data_params = {'batch_size': batch_size, 'shuffle': False, 'num_workers': 6}#, 'pin_memory': True}# if use_cuda else {}
all_data_loader = data.DataLoader(datatest,**all_data_params)


code_net=inertiaCodingNet()

optimizer=optim.Adam(code_net.parameters(),lr=1e-3,betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

for ep in range(epoch):
    Loss_rec_list=[]
    Loss_E_list=[]
    Loss_list=[]
    time1=pd.datetime.now()
    for i,x in enumerate(all_data_loader):
        print(i)
        optimizer.zero_grad()
        E,H,E_plus,x_rec=code_net(x)
        
        Loss_rec=torch.sum(torch.pow(x-x_rec,2))
        Loss_E=torch.sum(E_plus)
        Loss=Loss_rec+100*Loss_E
        optimizer.step()
        
        Loss_rec_list.append(Loss_rec.detach())
        Loss_E_list.append(Loss_E.detach())
        Loss_list.append(Loss.detach())
        

        if i%50==0:
            print(pd.datetime.now()-time1)
            time1=pd.datetime.now()
            print(i,' Loss_rec:',float(Loss_rec.data),' Loss_E:',float(100*Loss_E.data),' Loss:',float(Loss.data))
            save_image(x_rec[0],'/Users/lekang/anaconda/tests/Review/Torch/predictiveCoding/image_rec/'+str(epoch)+'_'+str(i) +'.jpg')

        
    print('Epoch:',ep,'  Loss_rec:',float(sum(Loss_rec_list)),' Loss_E:',float(100*sum(Loss_E_list)),
          ' Loss:',float(sum(Loss_list)))

                



0
0:00:16.183830
0  Loss_rec: 92132464.0  Loss_E: 58874.67578125  Loss: 92191336.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
0:11:49.720535
50  Loss_rec: 80014480.0  Loss_E: 59580.55859375  Loss: 80074064.0
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
0:11:56.591478
100  Loss_rec: 63410736.0  Loss_E: 57915.5625  Loss: 63468652.0
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
0:12:18.865637
150  Loss_rec: 56617000.0  Loss_E: 58726.66796875  Loss: 56675728.0
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
1

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
0:12:12.365799
100  Loss_rec: 63410656.0  Loss_E: 57371.03125  Loss: 63468028.0
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
0:12:12.463823
150  Loss_rec: 56616904.0  Loss_E: 59386.82421875  Loss: 56676292.0
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
0:12:12.249085
200  Loss_rec: 100730160.0  Loss_E: 58663.10546875  Loss: 100788824.0
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
0:12:14.028634
250  Loss_rec: 56122528.0  Loss_E: 58165.8671875  Loss: 56180692.0
251
252
2

152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
0:11:36.750357
200  Loss_rec: 100730216.0  Loss_E: 58315.22265625  Loss: 100788528.0
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
0:11:36.551364
250  Loss_rec: 56122460.0  Loss_E: 57944.3125  Loss: 56180404.0
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
0:11:36.994114
300  Loss_rec: 66929640.0  Loss_E: 57605.921875  Loss: 66987244.0
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

KeyboardInterrupt: 

In [9]:
float(Loss_rec.data)

-93887216.0

In [13]:
x_rec[0].shape

torch.Size([20, 3, 240, 320])