In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab/17645/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab/17645


In [2]:
# torchserve installation
!pip install torch torchvision
!pip install torchserve
!pip install torch-model-archiver



In [3]:
# prepare dataset
import pandas as pd
import numpy as np

filepath = 'data.csv'
# load dataframe with column (user, movie, rating)
data = pd.read_csv(filepath, index_col=0)
data

Unnamed: 0,user,movie,rating
0,99774,1-900+1994,4
1,116122,luxo+jr.+1986,3
2,117254,knick+knack+1989,4
3,15031,the+kennedys+2011,3
4,122854,fiddle-de-dee+1947,4
...,...,...,...
49995,126636,leon+the+professional+1994,5
49996,9110,the+cabin+in+the+woods+2012,4
49997,53481,a+good+night+to+die+2003,4
49998,63151,interstellar+2014,4


In [4]:
# get users
users = np.array(data['user'].unique())
n_users = len(users)
user_map = {user:idx for idx,user in enumerate(users)}
# get movies
movies = np.array(data['movie'].unique())
n_movies = len(movies)
movie_map = {movie:idx for idx,movie in enumerate(movies)}

In [5]:
# refactor data to idx

# map user id to user idx
user_ids = [user_map[user] for user in data['user']]
# map movie name to movie idx
movie_ids = [movie_map[movie] for movie in data['movie']]

movie_data = pd.DataFrame({'user_idx':user_ids, 'movie_idx':movie_ids, 'rating':data['rating']})
movie_data
# movie_data.to_csv('movie_data.csv')

Unnamed: 0,user_idx,movie_idx,rating
0,0,0,4
1,1,1,3
2,2,2,4
3,3,3,3
4,4,4,4
...,...,...,...
49995,40116,909,5
49996,40117,13571,4
49997,40118,17283,4
49998,40119,1556,4


In [13]:
import pickle
with open('mapping.pkl', 'wb') as f:
    pickle.dump({'users':users, 'movies':movies, 'user_map':user_map, 'movie_map':movie_map}, f)

In [6]:
# split to training and testing data
n = len(data)
mask = np.random.rand(n) < 0.8
train_data, test_data = movie_data[mask], movie_data[~mask]

In [7]:
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
# create dataloader

class MovieDataset(Dataset):
  def __init__(self, df):
    self.x = torch.tensor(df[['user_idx','movie_idx']].values)
    self.y = torch.tensor(df['rating'].values, dtype=torch.float)

  def __len__(self):
    return len(self.x)
  
  def __getitem__(self,idx):
    return self.x[idx], self.y[idx]

train_set = MovieDataset(train_data)
test_set = MovieDataset(test_data)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=len(test_data), shuffle=True)

In [14]:
# model definition
class MatrixFactorization(torch.nn.Module):
  def __init__(self, n_users, n_movies, n_factors=20):
    super().__init__()
    # create user embeddings
    self.user_factors = torch.nn.Embedding(n_users, n_factors,
                                            sparse=True)
    # create item embeddings
    self.movie_factors = torch.nn.Embedding(n_movies, n_factors,
                                            sparse=True)

  def forward(self, data):
    user, movie = data[:,0], data[:,1]
    # matrix multiplication
    return (self.user_factors(user)*self.movie_factors(movie)).sum(1)

  def predict(self, user, movie):
    return self.forward(user, movie)

In [15]:
# model training
model = MatrixFactorization(n_users, n_movies, n_factors=20)
loss_fn = torch.nn.MSELoss() 
optimizer = torch.optim.SparseAdam(model.parameters(), lr=1e-4)

for idx, (data, target) in enumerate(train_loader):
    # predict
    prediction = model(data)
    loss = loss_fn(prediction, target)

    # backpropagate
    loss.backward()

    # update weights
    optimizer.step()

    # log
    if idx % 100 == 0:
        print("Batch {}: training loss {}".format(idx, loss))

Batch 0: training loss 32.640621185302734
Batch 100: training loss 35.88711929321289
Batch 200: training loss 43.51371383666992
Batch 300: training loss 21.398447036743164
Batch 400: training loss 28.933731079101562
Batch 500: training loss 39.547447204589844
Batch 600: training loss 35.53773880004883


In [16]:
# calculate test accuracy
model.eval()
for idx, (data, target) in enumerate(test_loader):
    # predict
    prediction = model(data)
    loss = loss_fn(prediction, target)
print("Loss on test set: {}".format(float(loss / len(test_data))))

Loss on test set: 0.003317886032164097


In [17]:
torch.save(model.state_dict(), 'model_dic.pth')
# torch.save(model, 'model.pt')

In [48]:
!torch-model-archiver --model-name movie_model --version 1.0 --serialized-file model.pt --handle handle.py

In [49]:
!torch-model-archiver -h

usage: torch-model-archiver [-h] --model-name MODEL_NAME
                            [--serialized-file SERIALIZED_FILE]
                            [--model-file MODEL_FILE] --handler HANDLER
                            [--extra-files EXTRA_FILES]
                            [--runtime {python,python2,python3}]
                            [--export-path EXPORT_PATH]
                            [--archive-format {tgz,no-archive,default}] [-f]
                            -v VERSION [-r REQUIREMENTS_FILE]

Torch Model Archiver Tool

optional arguments:
  -h, --help            show this help message and exit
  --model-name MODEL_NAME
                        Exported model name. Exported file will be named as
                        model-name.mar and saved in current working directory if no --export-path is
                        specified, else it will be saved under the export path
  --serialized-file SERIALIZED_FILE
                        Path to .pt or .pth file containing state_dic

In [None]:
!mkdir model-store && cd model-store

In [41]:
!torchserve --start --model-store model_store --models movie_model.mar

2022-03-15T01:55:25,076 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager -  Loading snapshot serializer plugin...
2022-03-15T01:55:25,201 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Initializing plugins manager...
2022-03-15T01:55:25,409 [INFO ] main org.pytorch.serve.ModelServer - 
Torchserve version: 0.5.3
TS Home: /usr/local/lib/python3.7/dist-packages
Current directory: /content/drive/MyDrive/Colab/17645
Temp directory: /tmp
Number of GPUs: 0
Number of CPUs: 2
Max heap size: 3248 M
Python executable: /usr/bin/python3
Config file: N/A
Inference address: http://127.0.0.1:8080
Management address: http://127.0.0.1:8081
Metrics address: http://127.0.0.1:8082
Model Store: /content/drive/MyDrive/Colab/17645/model_store
Initial Models: movie_model.mar
Log dir: /content/drive/MyDrive/Colab/17645/logs
Metrics dir: /content/drive/MyDrive/Colab/17645/logs
Netty threads: 0
Netty client threads: 0
Default workers per model: 2
Blacklist Regex: N/A
Maximum Response