In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import pickle

cuda = torch.device('cpu')
frame_size = 10
batch_size = 1 # only 1 for testing

In [3]:
import json
movies = pickle.load(open('../data/infos_pca128.pytorch', 'rb'))
infos_web = json.load(open('../data/infos.json')) 

In [4]:
for i in movies.keys():
    movies[i] = movies[i].to(cuda)

In [5]:
class StateRepresentation(nn.Module):
    def __init__(self):
        super(StateRepresentation, self).__init__()
        self.lin = nn.Sequential(
            # 128 - embed size, 1 - rating size
            nn.Linear(frame_size * (128 + 1), 256),
            nn.Tanh(),
        )
        
    def forward(self, info, ratings):
        # raw_size - size of the raw movie info. Constant = 2591
        # embed_size - size of an ebedded movie. Constant = 64
        # raw -> embed via embeddings module defined above
        # input: currently info is batch_size x frame_size x raw_size
        # step 1: tramsform info to batch_size x (frame_size * embed_size)
        info = info.view(batch_size, frame_size * 128)
        # step 2: stack info with ratings. stacked: batch_size x (embed_size + 1)
        stacked = torch.cat([info, ratings], 1)
        # step 3: apply state represemtation module
        state = self.lin(stacked)
        return state

In [6]:
class Actor(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size, init_w=3e-3):
        super(Actor, self).__init__()
        
        self.state_rep = StateRepresentation()
        
        self.linear1 = nn.Linear(num_inputs, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, num_actions)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, info, rewards):
        state = self.state_rep(info, rewards)
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        x = torch.tanh(self.linear3(x))
        return state, x
    
    def get_action(self, info, rewards):
        state, action = self.forward(info, rewards)
        return state, action

In [7]:
class Critic(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size, init_w=3e-3):
        super(Critic, self).__init__()
        
        self.linear1 = nn.Linear(num_inputs + num_actions, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, 1)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state, action):
        action = torch.squeeze(action)
        x = torch.cat([state, action], 1)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [8]:
value_net  = Critic(256, 128, 320).to(cuda)
policy_net = Actor(256, 128, 192).to(cuda)
value_net.load_state_dict(torch.load("../models/value.pt", map_location='cpu'))
policy_net.load_state_dict(torch.load("../models/policy.pt", map_location='cpu'))
value_net.eval()
policy_net.eval()
print()




In [9]:
watched_ids = [1732, 172, 370, 1639, 1380, 2054, 471, 2502, 1625, 2001]
watched_ratings = torch.tensor([4.0, -3.0, -3.0, 2.0, 3.0, -2.0, 3.0, 1.0, 0.0, -1.0]).to(cuda).unsqueeze(0).float()
watched_infos = [movies[i] for i in watched_ids] 
watched_infos = torch.cat(watched_infos).unsqueeze(0)
enc_state, action = policy_net(watched_infos, watched_ratings)

In [10]:
action

tensor([[-0.9985, -0.9969,  0.9973, -0.9970, -0.9986, -0.9968, -0.9970, -0.9993,
         -0.9966, -0.9953, -0.9994, -0.9990, -0.9992, -0.9925, -0.9973, -0.9991,
         -0.9986, -0.9967, -0.9962, -0.9967, -0.9971, -0.9966,  0.7486, -0.9983,
         -0.9938, -0.9993, -0.9943, -0.9988, -0.9951,  0.9997, -0.9969, -0.9994,
          0.9977, -0.9935, -0.9993,  0.9988, -0.9990, -0.9986,  0.9983,  0.5878,
         -0.9795, -0.9975,  0.0671, -0.9976, -0.9993, -0.9992, -0.9970, -0.9964,
          0.8853, -0.9967, -0.9966, -0.9948, -0.9954,  0.9823, -0.9990,  0.9873,
         -0.9997, -0.9993, -0.9930, -0.9982, -0.3362, -0.9938, -0.9922, -0.9955,
         -0.9971,  0.9997,  0.9988, -0.9993, -0.9989, -0.9710,  0.9949, -0.9990,
         -0.9954,  0.9978, -0.9994, -0.9944, -0.9970, -0.9937, -0.9984,  0.9967,
         -0.9955, -0.9990, -0.9992, -0.9989, -0.9993, -0.9974, -0.9963, -0.9993,
         -0.9911, -0.9990,  0.9983,  0.9981, -0.9991, -0.9993, -0.9993, -0.9991,
         -0.9981, -0.9991, -

In [11]:
action.var()

tensor(0.4915, grad_fn=<VarBackward0>)

In [52]:
random_action = torch.empty(128).uniform_(-1,1)
random_action

tensor([-0.7103, -0.3453, -0.3574,  0.6183,  0.7056,  0.1067, -0.4534,  0.5810,
         0.9717,  0.1858, -0.3109, -0.9241, -0.6142, -0.6273, -0.9086,  0.2766,
        -0.7913,  0.7446,  0.0666,  0.9429,  0.8772,  0.2217, -0.4526,  0.0536,
         0.2869,  0.2191, -0.1877,  0.2326, -0.7874,  0.9069, -0.8697, -0.0737,
        -0.0590,  0.4975, -0.0768, -0.4887, -0.9594, -0.9082,  0.3597, -0.1192,
         0.9349,  0.5017,  0.5420, -0.2545,  0.2974, -0.1908,  0.5963, -0.4214,
        -0.6902,  0.2583,  0.5453,  0.4400,  0.2924, -0.2870, -0.0389,  0.0935,
         0.3440, -0.0054,  0.9166,  0.4394, -0.1058, -0.1101,  0.7794,  0.6250,
         0.9588,  0.8355, -0.9057,  0.4041,  0.0464,  0.7430, -0.5783, -0.1685,
         0.2270,  0.1166,  0.9292,  0.4437, -0.5728, -0.6607,  0.4373, -0.4984,
         0.2863, -0.4826, -0.2174,  0.0786,  0.3916, -0.1597, -0.2943, -0.1576,
         0.0875,  0.4856,  0.1884,  0.1208, -0.7054, -0.7903, -0.0713,  0.5164,
        -0.6003, -0.9040,  0.4272,  0.77

In [41]:
m_tensor = torch.stack([movies[i] for i in movies.keys()])

In [50]:
describe = pd.DataFrame({'min': [m_tensor[:,i].min().item() for i in range(m_tensor.size(1))],
           'max': [m_tensor[:,i].max().item() for i in range(m_tensor.size(1))],
           'mean': [m_tensor[:,i].mean().item() for i in range(m_tensor.size(1))],
           'var': [m_tensor[:,i].var().item() for i in range(m_tensor.size(1))],
           'std': [m_tensor[:,i].std().item() for i in range(m_tensor.size(1))],
          }, index=[str(i) for i in range(128)])

In [51]:
describe

Unnamed: 0,min,max,mean,var,std
0,-1.0,1.0,-0.972694,0.002249,0.047424
1,-1.0,1.0,-0.177242,0.087576,0.295933
2,-1.0,1.0,-0.038695,0.309991,0.556769
3,-1.0,1.0,-0.436343,0.053681,0.231692
4,-1.0,1.0,-0.875045,0.005066,0.071175
5,-1.0,1.0,-0.608863,0.017863,0.133653
6,-1.0,1.0,-0.123819,0.025534,0.159793
7,-1.0,1.0,-0.205939,0.031093,0.176333
8,-1.0,1.0,-0.748352,0.003157,0.056188
9,-1.0,1.0,-0.953102,0.000331,0.018182
