# Imports

In [2]:
import numpy as np, pandas as pd
import os, shutil
from distutils.dir_util import copy_tree

## Define source path for train and test data

In [3]:
'''
Define the path to the train and test data files on your local computer. 

src_path should be a directory which has two folders: 1) train, and 2) test
'train' folder must have a file called: ratings_train.csv
'test' folder must have a file called : ratings_test.csv 

Both 'ratings_train.csv' and 'ratings_text.csv' should have the following four fields (comma-separated): 
rating_id (string type)
user_id (string type)
item_id (string type)
rating (float)
'''


dataset = 'jester'      # jester, movielens-10m, movielens-20m, book-crossing

src_path = f'./data/{dataset}/processed/'

## Make Volume to Mount with appropriate folders
This is a folder created in this present directory. <br>
We will copy the data files into this folder and then bind mount this folder into the container. <br>
When the container is run, model artifacts and other outputs will also be saved to this same folder. <br>
These files on the shared volume will persist when the container is shut down. 

In [5]:
# Run this as-is. Do not change any folder names!!! 

mounted_volume = 'ml_vol'
if os.path.exists(mounted_volume): shutil.rmtree(mounted_volume)

os.mkdir(mounted_volume)
subdirs = ['data', 'logs', 'model', 'output']
for subdir in subdirs: 
    sub_dir_path = os.path.join(mounted_volume, subdir)
    os.mkdir(sub_dir_path)

subdirs = ['train', 'test']
for subdir in subdirs: 
    sub_dir_path = os.path.join(mounted_volume, 'data', subdir)
    os.mkdir(sub_dir_path)

## Copy Data from Source Path Into Mounted Volume

In [6]:
dest_path = os.path.join(mounted_volume, 'data')
for input_type in ['train', 'test']:
    full_src = os.path.join(src_path, input_type)
    full_dest = os.path.join(dest_path, input_type)
    
    if os.path.exists(full_src): copy_tree(full_src, full_dest)

# Build Image

In [13]:
%%bash 
docker build -t abudesai/rec_base_mf:1 ./mf_res

#1 [internal] load build definition from Dockerfile
#1 sha256:0c9c7b267063808c0008323d31af185ea3c5d359441f8fc90d54b394570a03bb
#1 transferring dockerfile: 38B 0.0s done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:e47969be58e26a961d0d1ceed55a8499806033a522914ac7edec300110e31e1d
#2 transferring context: 34B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.8.0-slim
#3 sha256:4f0c597550e30aa54f707f0017cf64d137017976c13b147baa6fd4ad0c55c91e
#3 ...

#4 [auth] library/python:pull token for registry-1.docker.io
#4 sha256:7a1518bdb5466a1be4a0d129c88e7b814f729397fce85abba89bdfdf4642246f
#4 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.8.0-slim
#3 sha256:4f0c597550e30aa54f707f0017cf64d137017976c13b147baa6fd4ad0c55c91e
#3 DONE 6.5s

#5 [1/5] FROM docker.io/library/python:3.8.0-slim@sha256:8e243f41e500238f78f7a29a81656114d3fe603d5c34079a462d090f71c4b225
#5 sha256:f2202870b184ece5b9a09c9b777f938cf0be25287ffe019e2c50e60191382ede
#5 DON

# Create Container From Image

In [15]:
%%bash 
declare vol_path="$(pwd)/ml_vol"
docker run -d -p 3000:3000 -v $vol_path:/app/ml_vol --name mfc abudesai/rec_base_mf:1

01d7d039df9ab8aa97bfa52d9479e342e062ba62332d802e11c570a6541e564d


# Check Container and Image

In [16]:
%%bash 
docker ps

CONTAINER ID   IMAGE                    COMMAND                  CREATED          STATUS          PORTS                    NAMES
01d7d039df9a   abudesai/rec_base_mf:1   "/bin/sh -c 'python â€¦"   19 seconds ago   Up 17 seconds   0.0.0.0:3000->3000/tcp   mfc


In [37]:
#%%bash 
#docker inspect mfc

# Run Training

In [10]:
%%bash 
docker exec mfc python train.py

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

2021-12-06 23:35:28.620173: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-06 23:35:28.620221: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-06 23:35:30.260466: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-06 23:35:30.260507: W tensorflow/stream_executor/cuda/cu

# Run Test Data Predictions

In [11]:
%%bash 
docker exec mfc python predict.py

test data shape:  (57489, 3)
proc_test_data shape:  (44729, 6)
preds shape:  (44729, 1)


2021-12-06 23:52:25.106476: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-06 23:52:25.106516: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-06 23:52:28.533509: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-06 23:52:28.533551: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-06 23:52:28.533562: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (3f983bdb18de): /proc/driver/nvidia/version does not exist
2021-12-06 23:52:28.533680: I tensorflow/core/platform/cpu_featu

# Score Test Data Predictions

In [12]:
%%bash 
docker exec mfc python score.py

score {'mse': 11.984519340055028, 'rmse': 3.4618664532380548, 'mae': 2.7503276394158536, 'nmae': 1.0240949426300248, 'smape': 156.6801513808124, 'r2': 0.17162261001815127}


2021-12-06 23:52:59.839827: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-06 23:52:59.839867: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Check Outputs In mounted volume
- data: this is where we mounted training and test data
- logs: contains logged model training output
- model: trained model artifacts
- output: contains predictions. Also contains output from hyper-parameter tuning, if run. 

In [1]:
%%bash
ls -R ml_vol

ml_vol:
data
output
score

ml_vol/data:
test
train

ml_vol/data/test:
ratings_test.csv

ml_vol/data/train:
attribute_defn.csv
ratings_train.csv
user_attributes.csv

ml_vol/output:
scores.csv

ml_vol/score:


# Push to docker hub 
Need permission to push to this repository on docker hub

In [17]:
%%bash
docker push abudesai/rec_base_mf:1

The push refers to repository [docker.io/abudesai/rec_base_mf]
5f70bf18a086: Preparing
2bcb74ebc7e3: Preparing
34541a4460b5: Preparing
6463cb47ff88: Preparing
d82a4c4e92ff: Preparing
155411760e3a: Preparing
2cbb114c7605: Preparing
459d9d53a256: Preparing
831c5620387f: Preparing
155411760e3a: Waiting
459d9d53a256: Waiting
831c5620387f: Waiting
2cbb114c7605: Waiting
5f70bf18a086: Layer already exists
6463cb47ff88: Layer already exists
d82a4c4e92ff: Layer already exists
459d9d53a256: Layer already exists
2cbb114c7605: Layer already exists
155411760e3a: Layer already exists
831c5620387f: Layer already exists
2bcb74ebc7e3: Pushed
34541a4460b5: Pushed
1: digest: sha256:8da6cd2a74f5b5542bb1f50ac19abfc3ba880ab9ef03d53d0e63968dcf4b35da size: 2208


# Stop Container and Remove Image

In [None]:
%%bash
docker stop mfc

In [None]:
%%bash
docker rm mfc

In [None]:
%%bash
docker rmi abudesai/rec_base_mf:1