# Imports

In [1]:
import numpy as np, pandas as pd
import os, shutil
from distutils.dir_util import copy_tree

# Define source path for train and test data

In [2]:
'''
Define the path to the train and test data files on your local computer. 

src_path should be a directory which has two folders: 1) train, and 2) test
'train' folder must have a file called: ratings_train.csv
'test' folder must have a file called : ratings_test.csv 

Both 'ratings_train.csv' and 'ratings_text.csv' should have the following four fields (comma-separated): 
rating_id (string type)
user_id (string type)
item_id (string type)
rating (float)
'''

# path to your folder which has train and test folders in it 
src_path = f'./local_data/'

src_path = f'./source_data/celal_data/processed/'

# Make Volume to Mount with appropriate folders
This is a folder created in this present directory. <br>
We will copy the data files into this folder and then bind mount this folder into the container. <br>
When the container is run, model artifacts and other outputs will also be saved to this same folder.<br>
These files on the shared volume will persist when the container is shut down.<br>

In [3]:
# Run this as-is. Do not change any folder names!!! 
mounted_volume = 'ml_vol'
if os.path.exists(mounted_volume): shutil.rmtree(mounted_volume)

os.mkdir(mounted_volume)
subdirs = ['data', 'logs', 'model', 'output']
for subdir in subdirs: 
    sub_dir_path = os.path.join(mounted_volume, subdir)
    os.mkdir(sub_dir_path)

subdirs = ['train', 'test']
for subdir in subdirs: 
    sub_dir_path = os.path.join(mounted_volume, 'data', subdir)
    os.mkdir(sub_dir_path)

# Copy Data from Source Path Into Mounted Volume

In [4]:
dest_path = os.path.join(mounted_volume, 'data')
for input_type in ['train', 'test']:
    full_src = os.path.join(src_path, input_type)
    full_dest = os.path.join(dest_path, input_type)
    
    if os.path.exists(full_src): copy_tree(full_src, full_dest)

# Pull Image from Docker Hub
Images are usually large (> 2GB) so they take time (minutes) to download. 

In [4]:
%%bash
docker pull abudesai/rec_base_mf:latest

latest: Pulling from abudesai/rec_base_mf
000eee12ec04: Already exists
ddc2d83f8229: Already exists
3ae1660fa0d9: Already exists
ef709117d3d3: Already exists
487a0421e8fa: Already exists
20b7e9b6e75d: Pulling fs layer
a0b8b9e23e81: Pulling fs layer
331a746aaeef: Pulling fs layer
4f4fb700ef54: Pulling fs layer
4f4fb700ef54: Waiting
20b7e9b6e75d: Download complete
331a746aaeef: Verifying Checksum
331a746aaeef: Download complete
20b7e9b6e75d: Pull complete
4f4fb700ef54: Download complete
a0b8b9e23e81: Verifying Checksum
a0b8b9e23e81: Download complete
a0b8b9e23e81: Pull complete
331a746aaeef: Pull complete
4f4fb700ef54: Pull complete
Digest: sha256:2a683aff7de64bfc29eca126ca9e297b2a323ac8053cbe7d524a3161dc0ef8b2
Status: Downloaded newer image for abudesai/rec_base_mf:latest
docker.io/abudesai/rec_base_mf:latest


# Create Container From Image

In [5]:
%%bash 
declare vol_path="$(pwd)/ml_vol"
docker run -d -p 3000:3000 -v $vol_path:/app/ml_vol --name mfc abudesai/rec_base_mf:latest

9b2ec5ece800ef66387d2c2413ae9c1dcee48c2c6d18de4ef4ecebe5ffbd05b6


# Check Container and Image
Check that it's working. 

In [6]:
%%bash 
docker ps

CONTAINER ID   IMAGE                         COMMAND                  CREATED         STATUS         PORTS                    NAMES
9b2ec5ece800   abudesai/rec_base_mf:latest   "/bin/sh -c 'python â€¦"   9 seconds ago   Up 8 seconds   0.0.0.0:3000->3000/tcp   mfc


# Check the app is running. 

http://localhost:3000/

# Run Training¶
Training jobs can take a few minutes to a few hours, depending on the size of the data and your computer. 
A training dataset with 10million rows may take 1o to 20 minutes to train the algorithm. 

In [7]:
%%bash 
docker exec mfc python train.py

Starting the training process...
orig_train_data shape: (3929542, 4)
Training Natrix_Factorizer ...
Pre-processing data...
Preprocessing train_data ...
Training model ...
Finished training autorec ...
Saving model ...
Total training time: 0.94 minutes


2021-12-14 19:35:40.556088: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-14 19:35:40.556131: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-14 19:35:42.447843: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-14 19:35:42.447891: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-14 19:35:42.447907: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (9b2ec5ece800): /proc/driver/nvidia/version does not exist
2021-12-14 19:35:42.448053: I tensorflow/core/platform/cpu_featu

### The trained model artifact should be in the ml_vol/model dir.

# Run Test Data Predictions

In [11]:
%%bash 
docker exec mfc python predict.py

test data shape:  (206818, 4)
proc_test_data shape:  (206818, 7)
preds shape:  (206818, 1)


2021-12-07 21:00:19.591987: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-07 21:00:19.592030: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-12-07 21:00:20.593932: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-07 21:00:20.593993: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-07 21:00:20.594016: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (130091659d26): /proc/driver/nvidia/version does not exist
2021-12-07 21:00:20.594201: I tensorflow/core/platform/cpu_featu

# Score Test Data Predictions

In [12]:
%%bash 
docker exec mfc python score.py

score {'mse': 17.75095366726145, 'rmse': 4.213188064549391, 'mae': 3.230556936789733, 'nmae': 4.309194728364527, 'smape': 103.95677659618579, 'r2': 0.3667734405148716}


2021-12-07 21:00:34.259473: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-07 21:00:34.259512: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Check Outputs In mounted volume
- data: this is where we mounted training and test data
- logs: contains logged model training output
- model: trained model artifacts
- output: contains predictions. Also contains output from hyper-parameter tuning, if run. 

In [16]:
%%bash
ls -R ml_vol

ml_vol:
data
logs
model
output

ml_vol/data:
test
train

ml_vol/data/test:
ratings_test.csv

ml_vol/data/train:
attribute_defn.csv
ratings_train.csv
user_attributes.csv

ml_vol/logs:

ml_vol/model:
checkpoint
model_params
model_weights.data-00000-of-00001
model_weights.index
preprocess_pipe.save

ml_vol/output:
predictions.csv
scores.csv


# Stop Container and Remove Image

In [17]:
%%bash
docker stop mfc

mfc


In [21]:
%%bash
docker rm mfc

mfc


In [22]:
%%bash
docker rmi abudesai/rec_base_mf:latest

Untagged: abudesai/rec_base_mf:1
Untagged: abudesai/rec_base_mf@sha256:3d2da2fe759aa941cb92cf45689a43c8431a9d610d5ffd9eb5a3d85beace0d8e
Deleted: sha256:cb26ffb01ab8b1ed4edbafa5cc0f81378b9f6293cec74c39e7fda43dc9f9d7b4
Deleted: sha256:579ba10b36934596136ff8cea888cfbc28116210e52bf7a0628a6cbf06e594c2
Deleted: sha256:d6df3f47491dd7a2e7f0d85eb433fe19bfb2d6f90e70b196698dfa087219e216
Deleted: sha256:aa01a591e4b196378f52d48b3434332c68aeb9998a1b15e2f40463f9878fbdcd
