# Imports

In [18]:
import numpy as np, pandas as pd
import os, shutil
from distutils.dir_util import copy_tree

## Define source path for train and test data

In [33]:
'''
Define the path to the train and test data files on your local computer. 

src_path should be a directory which has two folders: 1) train, and 2) test
'train' folder must have a file called: ratings_train.csv
'test' folder must have a file called : ratings_test.csv 

Both 'ratings_train.csv' and 'ratings_test.csv' should have the following four fields (comma-separated): 
rating_id (string type)
user_id (string type)
item_id (string type)
rating (float)

You may use the test file purely for predictions, i.e. without doing scoring. In that case, the file may exclude the 'rating'
column. 
'''


dataset = 'jester'      # jester, movielens-10m, movielens-20m, book-crossing

src_path = f'./data/{dataset}/processed/'

## Make Volume to Mount with appropriate folders
This is a folder created in this present directory. <br>
We will copy the data files into this folder and then bind mount this folder into the container. <br>
When the container is run, model artifacts and other outputs will also be saved to this same folder. <br>
These files on the shared volume will persist when the container is shut down. 

In [20]:
# Run this as-is. Do not change any folder names!!! 

mounted_volume = 'ml_vol'
if os.path.exists(mounted_volume): shutil.rmtree(mounted_volume)

os.mkdir(mounted_volume)
subdirs = ['data', 'logs', 'model', 'output']
for subdir in subdirs: 
    sub_dir_path = os.path.join(mounted_volume, subdir)
    os.mkdir(sub_dir_path)

subdirs = ['train', 'test']
for subdir in subdirs: 
    sub_dir_path = os.path.join(mounted_volume, 'data', subdir)
    os.mkdir(sub_dir_path)

## Copy Data from Source Path Into Mounted Volume

In [21]:
dest_path = os.path.join(mounted_volume, 'data')
for input_type in ['train', 'test']:
    full_src = os.path.join(src_path, input_type)
    full_dest = os.path.join(dest_path, input_type)
    
    if os.path.exists(full_src): copy_tree(full_src, full_dest)

# Build Image

In [69]:
%%bash 
docker build -t abudesai/rec_base_mf:latest ./mf_gd

#1 [internal] load build definition from Dockerfile
#1 sha256:0e64079d6a4de6de8a4dd5cb3f748919bdba76c975b309b289e3dc9519977512
#1 transferring dockerfile: 38B 0.0s done
#1 DONE 0.0s

#2 [internal] load .dockerignore
#2 sha256:543414db0c54ed2c0ff1716c9b1e6f3615c1ee23a93199f391289d68d9b3d3cd
#2 transferring context: 34B 0.0s done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.8.0-slim
#3 sha256:4f0c597550e30aa54f707f0017cf64d137017976c13b147baa6fd4ad0c55c91e
#3 ...

#4 [auth] library/python:pull token for registry-1.docker.io
#4 sha256:5050d93228b82dd142f137a4710be50b6aa1a99f5f3264a22e478a6aafc8740d
#4 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.8.0-slim
#3 sha256:4f0c597550e30aa54f707f0017cf64d137017976c13b147baa6fd4ad0c55c91e
#3 DONE 12.0s

#10 [1/5] FROM docker.io/library/python:3.8.0-slim@sha256:8e243f41e500238f78f7a29a81656114d3fe603d5c34079a462d090f71c4b225
#10 sha256:f2202870b184ece5b9a09c9b777f938cf0be25287ffe019e2c50e60191382ed

# Create Container From Image

In [70]:
%%bash 
declare vol_path="$(pwd)/ml_vol"
docker run -d -p 3000:3000 -v $vol_path:/app/ml_vol --name mf_gd abudesai/rec_base_mf:latest

c433fbff4a2aa09e9f02b9da7151cc2eaed2f2cfd3210339326a34cd56d92d1a


# Check Container and Image

In [83]:
!docker ps

CONTAINER ID   IMAGE                         COMMAND                  CREATED             STATUS             PORTS                    NAMES
c433fbff4a2a   abudesai/rec_base_mf:latest   "/bin/sh -c 'python â€¦"   About an hour ago   Up About an hour   0.0.0.0:3000->3000/tcp   mf_gd
31582b63b4a2   myapp                         "python first-pythonâ€¦"   2 hours ago         Up 2 hours         0.0.0.0:8080->8080/tcp   myappc


In [72]:
#%%bash 
#docker inspect mfc

# Run Training

In [73]:
%%bash 
docker exec mf_gd python train.py

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

2021-12-14 23:37:33.260042: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-14 23:37:33.260081: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-14 23:37:35.138594: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-14 23:37:35.138636: W tensorflow/stream_executor/cuda/cu

# Run Test Data Predictions

In [76]:
!docker exec mf_gd python predict.py

Reading prediction data... 
test data shape:  (206818, 4)
Loading trained Matrix_Factorizer... 
Making predictions... 
proc_test_data shape:  (206818, 7)
preds shape:  (206818, 1)
Saving predictions... 
Done with predictions.


2021-12-15 00:28:22.120251: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-15 00:28:22.120348: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-15 00:28:24.157762: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-15 00:28:24.157885: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-15 00:28:24.157917: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c433fbff4a2a): /proc/driver/nvidia/version does not exist
2021-12-15 00:28:24.158293: I tensorflow/core/platform/cpu_featu

In [77]:
!docker exec mf_gd python predict.py

Reading prediction data... 
test data shape:  (206818, 4)
Loading trained Matrix_Factorizer... 
Making predictions... 
proc_test_data shape:  (206818, 7)
preds shape:  (206818, 1)
Saving predictions... 
Done with predictions.


2021-12-15 00:28:47.754337: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-15 00:28:47.754437: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-15 00:28:49.514068: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-15 00:28:49.514180: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-15 00:28:49.514209: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c433fbff4a2a): /proc/driver/nvidia/version does not exist
2021-12-15 00:28:49.514686: I tensorflow/core/platform/cpu_featu

# Score Test Data Predictions

In [78]:
!docker exec mf_gd python score.py

scores: {'mse': 17.750801283164236, 'rmse': 4.2131699803312275, 'mae': 3.230549243527804, 'nmae': 4.309184466430007, 'smape': 103.95697621569137, 'r2': 0.3667788764852091}


2021-12-15 00:29:17.131255: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-15 00:29:17.131323: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Check Outputs In mounted volume
- data: this is where we mounted training and test data
- logs: contains logged model training output
- model: trained model artifacts
- output: contains predictions. Also contains output from hyper-parameter tuning, if run. 

In [81]:
%%bash
ls -R ml_vol

ml_vol:
data
logs
model
output

ml_vol/data:
test
train

ml_vol/data/test:
ratings_test.csv

ml_vol/data/train:
attribute_defn.csv
ratings_train.csv
user_attributes.csv

ml_vol/logs:

ml_vol/model:
checkpoint
model_params
model_weights.data-00000-of-00001
model_weights.index
preprocess_pipe.save

ml_vol/output:
predictions.csv
scores.csv


# Push to docker hub 
Need permission to push to this repository on docker hub

In [82]:
!docker push abudesai/rec_base_mf:latest

The push refers to repository [docker.io/abudesai/rec_base_mf]
5f70bf18a086: Preparing
346efa7f5836: Preparing
3c8526b9c9f7: Preparing
6463cb47ff88: Preparing
d82a4c4e92ff: Preparing
155411760e3a: Preparing
2cbb114c7605: Preparing
459d9d53a256: Preparing
831c5620387f: Preparing
155411760e3a: Waiting
459d9d53a256: Waiting
2cbb114c7605: Waiting
831c5620387f: Waiting
5f70bf18a086: Layer already exists
d82a4c4e92ff: Layer already exists
6463cb47ff88: Layer already exists
2cbb114c7605: Layer already exists
459d9d53a256: Layer already exists
155411760e3a: Layer already exists
831c5620387f: Layer already exists
346efa7f5836: Pushed
3c8526b9c9f7: Pushed
latest: digest: sha256:9c3fe7d38de8be4a52b6f06981cc3ca59ce057c04523c696f1d25f6b8a68d363 size: 2206


In [57]:
%%bash
docker push abudesai/rec_base_mf:latest

The push refers to repository [docker.io/abudesai/rec_base_mf]
5f70bf18a086: Preparing
9e7efba54c94: Preparing
ee11680e9331: Preparing
6463cb47ff88: Preparing
d82a4c4e92ff: Preparing
155411760e3a: Preparing
2cbb114c7605: Preparing
459d9d53a256: Preparing
831c5620387f: Preparing
155411760e3a: Waiting
2cbb114c7605: Waiting
459d9d53a256: Waiting
831c5620387f: Waiting
d82a4c4e92ff: Layer already exists
5f70bf18a086: Layer already exists
6463cb47ff88: Layer already exists
ee11680e9331: Layer already exists
155411760e3a: Layer already exists
459d9d53a256: Layer already exists
2cbb114c7605: Layer already exists
831c5620387f: Layer already exists
9e7efba54c94: Pushed
latest: digest: sha256:2a683aff7de64bfc29eca126ca9e297b2a323ac8053cbe7d524a3161dc0ef8b2 size: 2206


# Stop Container and Remove Image

In [58]:
%%bash
docker stop mf_gd

mf_gd


In [59]:
%%bash
docker rm mf_gd

mf_gd


In [60]:
%%bash
docker rmi abudesai/rec_base_mf:latest

Untagged: abudesai/rec_base_mf:latest
Untagged: abudesai/rec_base_mf@sha256:2a683aff7de64bfc29eca126ca9e297b2a323ac8053cbe7d524a3161dc0ef8b2
Deleted: sha256:19d78cc6643cbc7bb56dbccc57c7b0c4945212be6b8277fc604cb95e71c352ec
